diff --git a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/kube_app.py b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/kube_app.py index a50718d2e7..bd8eec1da9 100644 --- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/kube_app.py +++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/kube_app.py @@ -8,14 +8,10 @@ import os import hashlib import pecan from pecan import rest -import shutil -import stat -import tempfile import wsme from wsme import types as wtypes import wsmeext.pecan as wsme_pecan -from contextlib import contextmanager from oslo_log import log from sysinv._i18n import _ from sysinv import objects @@ -34,17 +30,6 @@ import cgcs_patch.constants as patch_constants LOG = log.getLogger(__name__) -@contextmanager -def TempDirectory(): - tmpdir = tempfile.mkdtemp() - os.chmod(tmpdir, stat.S_IRWXU) - try: - yield tmpdir - finally: - LOG.debug("Cleaning up temp directory %s" % tmpdir) - shutil.rmtree(tmpdir) - - class KubeApp(base.APIBase): """API representation of a containerized application.""" @@ -165,7 +150,7 @@ class KubeAppController(rest.RestController): "{} has unrecognizable tar file extension. Supported " "extensions are: .tgz and .tar.gz.".format(app_tarfile)) - with TempDirectory() as app_path: + with cutils.TempDirectory() as app_path: if not cutils.extract_tarfile(app_path, app_tarfile): _handle_upload_failure( "failed to extract tar file {}.".format(os.path.basename(app_tarfile))) @@ -585,21 +570,38 @@ class KubeAppHelper(object): raise exception.SysinvException(_( "Patching operation is in progress.")) - def _check_patch_is_applied(self, patches): + def _check_required_patches_are_applied(self, patches=None): + """Validates that each patch provided is applied on the system""" + if patches is None: + patches = [] try: system = self._dbapi.isystem_get_one() - response = patch_api.patch_is_applied( + response = patch_api.patch_query( token=None, timeout=constants.PATCH_DEFAULT_TIMEOUT_IN_SECS, - region_name=system.region_name, - patches=patches + region_name=system.region_name ) except Exception as e: LOG.error(e) raise exception.SysinvException(_( "Error while querying patch-controller for the " "state of the patch(es).")) - return response + query_patches = response['pd'] + applied_patches = [] + for patch_key in query_patches: + patch = query_patches[patch_key] + patchstate = patch.get('patchstate', None) + if patchstate == patch_constants.APPLIED or \ + patchstate == patch_constants.COMMITTED: + applied_patches.append(patch_key) + + missing_patches = [] + for required_patch in patches: + if required_patch not in applied_patches: + missing_patches.append(required_patch) + + success = not missing_patches + return success, missing_patches def _patch_report_app_dependencies(self, name, patches=None): if patches is None: @@ -659,10 +661,12 @@ class KubeAppHelper(object): raise exception.SysinvException(_( "Application-upload rejected: manifest file is missing.")) - def _verify_metadata_file(self, app_path, app_name, app_version): + def _verify_metadata_file(self, app_path, app_name, app_version, + upgrade_from_release=None): try: name, version, patches = cutils.find_metadata_file( - app_path, constants.APP_METADATA_FILE) + app_path, constants.APP_METADATA_FILE, + upgrade_from_release=upgrade_from_release) except exception.SysinvException as e: raise exception.SysinvException(_( "metadata validation failed. {}".format(e))) @@ -673,8 +677,8 @@ class KubeAppHelper(object): version = app_version if (not name or not version or - name == constants.APP_VERSION_PLACEHOLDER or - version == constants.APP_VERSION_PLACEHOLDER): + name.startswith(constants.APP_VERSION_PLACEHOLDER) or + version.startswith(constants.APP_VERSION_PLACEHOLDER)): raise exception.SysinvException(_( "application name or/and version is/are not included " "in the tar file. Please specify the application name " @@ -692,16 +696,19 @@ class KubeAppHelper(object): "{}. Communication Error with patching subsytem. " "Preventing application upload.".format(e))) - applied = self._check_patch_is_applied(patches) + applied, missing_patches = \ + self._check_required_patches_are_applied(patches) if not applied: raise exception.SysinvException(_( - "the required patch(es) for application {} ({}) " - "must be applied".format(name, version))) + "the required patch(es) ({}) for application {} ({}) " + "must be applied".format(', '.join(missing_patches), + name, version))) LOG.info("The required patch(es) for application {} ({}) " "has/have applied.".format(name, version)) else: - LOG.info("No patch required for application {} ({}).".format(name, version)) + LOG.info("No patch required for application {} ({})." + "".format(name, version)) return name, version, patches diff --git a/sysinv/sysinv/sysinv/sysinv/common/constants.py b/sysinv/sysinv/sysinv/sysinv/common/constants.py index 66ac83575f..6fed568d8d 100644 --- a/sysinv/sysinv/sysinv/sysinv/common/constants.py +++ b/sysinv/sysinv/sysinv/sysinv/common/constants.py @@ -1614,6 +1614,13 @@ APP_METADATA_DESIRED_STATE = 'desired_state' APP_METADATA_DESIRED_STATES = 'desired_states' APP_METADATA_FORBIDDEN_MANUAL_OPERATIONS = 'forbidden_manual_operations' APP_METADATA_ORDERED_APPS = 'ordered_apps' +APP_METADATA_UPGRADES = 'upgrades' +APP_METADATA_UPDATE_FAILURE_NO_ROLLBACK = 'update_failure_no_rollback' +APP_METADATA_FROM_VERSIONS = 'from_versions' +APP_METADATA_SUPPORTED_K8S_VERSION = 'supported_k8s_version' +APP_METADATA_SUPPORTED_RELEASES = 'supported_releases' +APP_METADATA_MINIMUM = 'minimum' +APP_METADATA_MAXIMUM = 'maximum' APP_EVALUATE_REAPPLY_TYPE_HOST_ADD = 'host-add' APP_EVALUATE_REAPPLY_TYPE_HOST_DELETE = 'host-delete' diff --git a/sysinv/sysinv/sysinv/sysinv/common/health.py b/sysinv/sysinv/sysinv/sysinv/common/health.py index 29f83dbda5..4522e217e8 100755 --- a/sysinv/sysinv/sysinv/sysinv/common/health.py +++ b/sysinv/sysinv/sysinv/sysinv/common/health.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2018-2020 Wind River Systems, Inc. +# Copyright (c) 2018-2021 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -10,6 +10,7 @@ from oslo_log import log from sysinv._i18n import _ from sysinv.common import ceph from sysinv.common import constants +from sysinv.common import exception from sysinv.common import kubernetes from sysinv.common import utils from sysinv.common.fm import fmclient @@ -118,6 +119,10 @@ class Health(object): return success, allowed, affecting + def _check_active_is_controller_0(self): + """Checks that active controller is controller-0""" + return utils.get_local_controller_hostname() == constants.CONTROLLER_0_HOSTNAME + def get_alarms_degrade(self, context, alarm_ignore_list=None, entity_instance_id_filter=""): """Return all the alarms that cause the degrade""" @@ -157,11 +162,22 @@ class Health(object): return True - def _check_required_patches(self, patch_list): + def _check_required_patches_are_applied(self, patches=None): """Validates that each patch provided is applied on the system""" - system = self._dbapi.isystem_get_one() - response = patch_api.patch_query(token=None, timeout=60, - region_name=system.region_name) + if patches is None: + patches = [] + try: + system = self._dbapi.isystem_get_one() + response = patch_api.patch_query( + token=None, + timeout=constants.PATCH_DEFAULT_TIMEOUT_IN_SECS, + region_name=system.region_name + ) + except Exception as e: + LOG.error(e) + raise exception.SysinvException(_( + "Error while querying sw-patch-controller for the " + "state of the patch(es).")) query_patches = response['pd'] applied_patches = [] for patch_key in query_patches: @@ -172,7 +188,7 @@ class Health(object): applied_patches.append(patch_key) missing_patches = [] - for required_patch in patch_list: + for required_patch in patches: if required_patch not in applied_patches: missing_patches.append(required_patch) @@ -372,6 +388,8 @@ class Health(object): # A load is imported # The load patch requirements are met # The license is valid for the N+1 load + # All kubernetes applications are in a stable state + # Package metadata criteria are met system_mode = self._dbapi.isystem_get_one().system_mode simplex = (system_mode == constants.SYSTEM_MODE_SIMPLEX) @@ -393,7 +411,8 @@ class Health(object): else: patches = [] - success, missing_patches = self._check_required_patches(patches) + success, missing_patches = \ + self._check_required_patches_are_applied(patches) output += _('Required patches are applied: [%s]\n') \ % (Health.SUCCESS_MSG if success else Health.FAIL_MSG) if not success: @@ -433,6 +452,26 @@ class Health(object): health_ok = health_ok and success + success, apps_not_valid = self._check_kube_applications() + output += _( + 'All kubernetes applications are in a valid state: [%s]\n') \ + % (Health.SUCCESS_MSG if success else Health.FAIL_MSG) + if not success: + output += _('Kubernetes applications not in a valid state: %s\n') \ + % ', '.join(apps_not_valid) + + health_ok = health_ok and success + + # The load is only imported to controller-0. An upgrade can only + # be started when controller-0 is active. + is_controller_0 = self._check_active_is_controller_0() + success = is_controller_0 + output += \ + _('Active controller is controller-0: [%s]\n') \ + % (Health.SUCCESS_MSG if success else Health.FAIL_MSG) + + health_ok = health_ok and success + return health_ok, output def get_system_health_kube_upgrade(self, diff --git a/sysinv/sysinv/sysinv/sysinv/common/utils.py b/sysinv/sysinv/sysinv/sysinv/common/utils.py index f8ff7675a9..2b678f7355 100644 --- a/sysinv/sysinv/sysinv/sysinv/common/utils.py +++ b/sysinv/sysinv/sysinv/sysinv/common/utils.py @@ -53,6 +53,7 @@ import shutil import signal import six import socket +import stat import string import tempfile import time @@ -1894,17 +1895,26 @@ def verify_checksum(path): return rc -def find_metadata_file(path, metadata_file): +def find_metadata_file(path, metadata_file, upgrade_from_release=None): """ Find and validate the metadata file in a given directory. Valid keys for metadata file are defined in the following format: app_name: app_version: - patch_dependencies: - - - - - ... + upgrades: + update_failure_no_rollback: + from_versions: + - + - + supported_k8s_version: + minimum: + maximum: + supported_releases: + : + - + - + ... repo: - optional: defaults to HELM_REPO_FOR_APPS disabled_charts: - optional: charts default to enabled - @@ -1958,7 +1968,6 @@ def find_metadata_file(path, metadata_file): doc = yaml.safe_load(f) app_name = doc['app_name'] app_version = doc['app_version'] - patches = doc['patch_dependencies'] except KeyError: # metadata file does not have the key(s) pass @@ -1969,11 +1978,6 @@ def find_metadata_file(path, metadata_file): "Invalid %s: app_name or/and app_version " "is/are None." % metadata_file)) - if not isinstance(patches, list): - raise exception.SysinvException(_( - "Invalid %s: patch_dependencies should " - "be a list." % metadata_file)) - behavior = None evaluate_reapply = None triggers = None @@ -2098,6 +2102,127 @@ def find_metadata_file(path, metadata_file): except KeyError: pass + upgrades = None + from_versions = [] + + try: + upgrades = doc[constants.APP_METADATA_UPGRADES] + if not isinstance(upgrades, dict): + raise exception.SysinvException(_( + "Invalid {}: {} should be a dict." + "".format(metadata_file, + constants.APP_METADATA_UPGRADES))) + except KeyError: + pass + + if upgrades: + try: + no_rollback = \ + upgrades[constants.APP_METADATA_UPDATE_FAILURE_NO_ROLLBACK] + if not is_valid_boolstr(no_rollback): + raise exception.SysinvException(_( + "Invalid {}: {} expected value is a boolean string." + "".format(metadata_file, + constants.APP_METADATA_UPDATE_FAILURE_NO_ROLLBACK))) + except KeyError: + pass + + try: + from_versions = upgrades[constants.APP_METADATA_FROM_VERSIONS] + if not isinstance(from_versions, list): + raise exception.SysinvException(_( + "Invalid {}: {} should be a dict." + "".format(metadata_file, + constants.APP_METADATA_FROM_VERSIONS))) + except KeyError: + pass + + for version in from_versions: + if not isinstance(version, six.string_types): + raise exception.SysinvException(_( + "Invalid {}: {} each version should be {}." + "".format(metadata_file, + constants.APP_METADATA_FROM_VERSIONS, + six.string_types))) + + k8s_version = None + + try: + k8s_version = doc[constants.APP_METADATA_SUPPORTED_K8S_VERSION] + if not isinstance(k8s_version, dict): + raise exception.SysinvException(_( + "Invalid {}: {} should be a dict." + "".format(metadata_file, + constants.APP_METADATA_SUPPORTED_K8S_VERSION))) + except KeyError: + pass + + if k8s_version: + try: + _minimum = k8s_version[constants.APP_METADATA_MINIMUM] + if not isinstance(_minimum, six.string_types): + raise exception.SysinvException(_( + "Invalid {}: {} should be {}." + "".format(metadata_file, + constants.constants.APP_METADATA_MINIMUM, + six.string_types))) + except KeyError: + pass + + try: + _maximum = k8s_version[constants.APP_METADATA_MAXIMUM] + if not isinstance(_maximum, six.string_types): + raise exception.SysinvException(_( + "Invalid {}: {} should be {}." + "".format(metadata_file, + constants.constants.APP_METADATA_MAXIMUM, + six.string_types))) + except KeyError: + pass + + supported_releases = {} + try: + supported_releases = doc[constants.APP_METADATA_SUPPORTED_RELEASES] + if not isinstance(supported_releases, dict): + raise exception.SysinvException(_( + "Invalid {}: {} should be a dict." + "".format(metadata_file, + constants.APP_METADATA_SUPPORTED_RELEASES))) + except KeyError: + pass + + if upgrade_from_release is None: + check_release = get_sw_version() + else: + check_release = upgrade_from_release + for release, release_patches in supported_releases.items(): + if not isinstance(release, six.string_types): + raise exception.SysinvException(_( + "Invalid {}: {} release key should be {}." + "".format(metadata_file, + constants.APP_METADATA_SUPPORTED_RELEASES, + six.string_types))) + if not isinstance(release_patches, list): + raise exception.SysinvException(_( + "Invalid {}: {} : [, ...] " + "patches should be a list." + "".format(metadata_file, + constants.APP_METADATA_SUPPORTED_RELEASES))) + for patch in release_patches: + if not isinstance(patch, six.string_types): + raise exception.SysinvException(_( + "Invalid {}: {} : [, ...] " + "each patch should be {}." + "".format(metadata_file, + constants.APP_METADATA_SUPPORTED_RELEASES, + six.string_types))) + if release == check_release: + patches.extend(release_patches) + LOG.info('{}, application {} ({}), ' + 'check_release {}, requires patches {}' + ''.format(metadata_file, app_name, app_version, + check_release, release_patches)) + return app_name, app_version, patches @@ -2664,3 +2789,31 @@ def get_upgradable_hosts(dbapi): hosts = [i for i in all_hosts if i.personality != constants.EDGEWORKER] return hosts + + +def deep_get(nested_dict, keys, default=None): + """Get a value from nested dictionary.""" + if not isinstance(nested_dict, dict): + raise exception.SysinvException(_( + "Expected a dictionary, cannot get keys {}.".format(keys))) + + def _reducer(d, key): + if isinstance(d, dict): + return d.get(key, default) + return default + + return functools.reduce(_reducer, keys, nested_dict) + + +@contextlib.contextmanager +def TempDirectory(): + tmpdir = tempfile.mkdtemp() + os.chmod(tmpdir, stat.S_IRWXU) + try: + yield tmpdir + finally: + try: + LOG.debug("Cleaning up temp directory %s" % tmpdir) + shutil.rmtree(tmpdir) + except OSError as e: + LOG.error(_('Could not remove tmpdir: %s'), str(e)) diff --git a/sysinv/sysinv/sysinv/sysinv/conductor/kube_app.py b/sysinv/sysinv/sysinv/sysinv/conductor/kube_app.py index a60118977a..7acded1843 100644 --- a/sysinv/sysinv/sysinv/sysinv/conductor/kube_app.py +++ b/sysinv/sysinv/sysinv/sysinv/conductor/kube_app.py @@ -30,6 +30,7 @@ import time import zipfile from collections import namedtuple +from distutils.util import strtobool from eventlet import greenpool from eventlet import greenthread from eventlet import queue @@ -1252,21 +1253,54 @@ class AppOperator(object): except Exception as e: LOG.exception(e) - def _get_metadata_value(self, app, flag, default): - # This function gets a boolean - # parameter from application metadata - flag_result = default + def _get_metadata_value(self, app, key_or_keys, default=None, + enforce_type=False): + """ + Get application metadata value from nested dictionary. + + If a default value is specified, this will enforce that + the value returned is of the same type. + + :param app: application object + :param key_or_keys: single key string, or list of keys + :param default: default value (and type) + :param enforce_type: enforce type check between return value and default + + :return: The value from nested dictionary D[key1][key2][...] = value + assuming all keys are present, otherwise default. + """ + value = default + + if isinstance(key_or_keys, list): + keys = key_or_keys + else: + keys = [key_or_keys] + metadata_file = os.path.join(app.inst_path, constants.APP_METADATA_FILE) if os.path.exists(metadata_file) and os.path.getsize(metadata_file) > 0: with open(metadata_file, 'r') as f: try: - y = yaml.safe_load(f) - flag_result = y.get(flag, default) + metadata = yaml.safe_load(f) or {} + value = cutils.deep_get(metadata, keys, default=default) + # TODO(jgauld): There is inconsistent treatment of YAML + # boolean between the module ruamel.yaml and module yaml + # in utils.py, health.py, and kube_app.py. Until these + # usage variants are unified, leave the following check + # as optional. + if enforce_type and default is not None and value is not None: + default_type = type(default) + if type(value) != default_type: + raise exception.SysinvException(_( + "Invalid {}: {} {!r} expected value is {}." + "".format(metadata_file, '.'.join(keys), + value, default_type))) except KeyError: # metadata file does not have the key pass - return flag_result + LOG.debug('_get_metadata_value: metadata_file=%s, keys=%s, default=%r, value=%r', + metadata_file, keys, default, value) + return value def _preserve_user_overrides(self, from_app, to_app): """Dump user overrides @@ -1610,7 +1644,7 @@ class AppOperator(object): LOG.error("Application %s recover to version %s aborted!" % (old_app.name, old_app.version)) - def _perform_app_rollback(self, from_app, to_app): + def _perform_app_rollback(self, from_app, to_app, no_rollback): """Perform application rollback request This method invokes Armada to rollback the application releases to @@ -1619,10 +1653,18 @@ class AppOperator(object): :param from_app: application object that application updating from :param to_app: application object that application updating to + :param no_rollback: boolean: whether application should skip rollback :return boolean: whether application rollback was successful """ LOG.info("Application %s (%s) rollback started." % (to_app.name, to_app.version)) + if no_rollback: + LOG.info("Application %s (%s) has configured no_rollback %s, " + "rollback skipped.", + to_app.name, to_app.version, no_rollback) + # Assume application not aborted. The subsequent success path will + # cleanup the from_app. + return True try: if AppOperator.is_app_aborted(to_app.name): @@ -2455,7 +2497,10 @@ class AppOperator(object): self._plugins.activate_plugins(to_app) # lifecycle hooks not used in perform_app_rollback - result = self._perform_app_rollback(from_app, to_app) + keys = [constants.APP_METADATA_UPGRADES, + constants.APP_METADATA_UPDATE_FAILURE_NO_ROLLBACK] + no_rollback = bool(strtobool(str(self._get_metadata_value(to_app, keys, False)))) + result = self._perform_app_rollback(from_app, to_app, no_rollback) if not result: LOG.error("Application %s update from version %s to version " diff --git a/sysinv/sysinv/sysinv/sysinv/conductor/manager.py b/sysinv/sysinv/sysinv/sysinv/conductor/manager.py index 0ccf38e1cf..6a4182c1ce 100644 --- a/sysinv/sysinv/sysinv/sysinv/conductor/manager.py +++ b/sysinv/sysinv/sysinv/sysinv/conductor/manager.py @@ -5591,7 +5591,7 @@ class ConductorManager(service.PeriodicService): tarball_name = '{}/{}'.format( constants.HELM_APP_ISO_INSTALL_PATH, tarfiles[0]) - with kube_api.TempDirectory() as app_path: + with cutils.TempDirectory() as app_path: if not cutils.extract_tarfile(app_path, tarball_name): LOG.error("Failed to extract tar file {}.".format( os.path.basename(tarball_name))) @@ -5691,7 +5691,7 @@ class ConductorManager(service.PeriodicService): tarball_name = '{}/{}'.format( constants.HELM_APP_ISO_INSTALL_PATH, tarfile) - with kube_api.TempDirectory() as app_path: + with cutils.TempDirectory() as app_path: if not cutils.extract_tarfile(app_path, tarball_name): LOG.error("Failed to extract tar file {}.".format( os.path.basename(tarball_name))) diff --git a/sysinv/sysinv/sysinv/sysinv/db/sqlalchemy/api.py b/sysinv/sysinv/sysinv/sysinv/db/sqlalchemy/api.py index e5319104f1..ab40146ec4 100644 --- a/sysinv/sysinv/sysinv/sysinv/db/sqlalchemy/api.py +++ b/sysinv/sysinv/sysinv/sysinv/db/sqlalchemy/api.py @@ -7716,7 +7716,7 @@ class Connection(api.Connection): count = query.update(values, synchronize_session='fetch') if count == 0: - raise exception.KubeAppNotFound(values['name']) + raise exception.KubeAppNotFound(name=values.get('name')) return query.one() def kube_app_destroy(self, name, version=None, inactive=False):