diff --git a/sysinv/sysinv/centos/build_srpm.data b/sysinv/sysinv/centos/build_srpm.data index 84c0634de1..28d3efc66d 100644 --- a/sysinv/sysinv/centos/build_srpm.data +++ b/sysinv/sysinv/centos/build_srpm.data @@ -1,2 +1,2 @@ SRC_DIR="sysinv" -TIS_PATCH_VER=332 +TIS_PATCH_VER=333 diff --git a/sysinv/sysinv/sysinv/sysinv/common/image_versions.py b/sysinv/sysinv/sysinv/sysinv/common/image_versions.py new file mode 100644 index 0000000000..254fa3e347 --- /dev/null +++ b/sysinv/sysinv/sysinv/sysinv/common/image_versions.py @@ -0,0 +1,13 @@ +# +# Copyright (c) 2019 Wind River Systems, Inc. +# +# SPDX-License-Identifier: Apache-2.0 +# + +TILLER_CONTAINER_NAME = 'tiller' +TILLER_SELECTOR_NAME = 'tiller' +TILLER_IMAGE_NAME = '/kubernetes-helm/tiller' +TILLER_IMAGE_VERSION = "v2.13.1" + +ARMADA_IMAGE_NAME = '/airshipit/armada' +ARMADA_IMAGE_VERSION = "8a1638098f88d92bf799ef4934abe569789b885e-ubuntu_bionic" diff --git a/sysinv/sysinv/sysinv/sysinv/common/kubernetes.py b/sysinv/sysinv/sysinv/sysinv/common/kubernetes.py index db0b4c304b..360bd1ce16 100644 --- a/sysinv/sysinv/sysinv/sysinv/common/kubernetes.py +++ b/sysinv/sysinv/sysinv/sysinv/common/kubernetes.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2013-2018 Wind River Systems, Inc. +# Copyright (c) 2013-2019 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # @@ -84,6 +84,23 @@ class KubeOperator(object): LOG.error("Kubernetes exception in kube_get_nodes: %s" % e) raise + def kube_get_image_by_selector(self, template_name, namespace, container_name): + LOG.debug("kube_get_image_by_selector template_name=%s, namespace=%s" % + (template_name, namespace)) + try: + # Retrieve the named pod. + api_response = self._get_kubernetesclient_core().list_namespaced_pod( + namespace, label_selector="name=%s" % template_name) + for pod in api_response.items: + if template_name in pod.metadata.name: + for container in pod.spec.containers: + if container.name == container_name: + return container.image + return None + except ApiException as e: + LOG.error("Kubernetes exception in list_namespaced_pod: %s" % e) + raise + def kube_create_namespace(self, namespace): body = {'metadata': {'name': namespace}} diff --git a/sysinv/sysinv/sysinv/sysinv/conductor/kube_app.py b/sysinv/sysinv/sysinv/sysinv/conductor/kube_app.py index 02c541a2c6..564735187a 100644 --- a/sysinv/sysinv/sysinv/sysinv/conductor/kube_app.py +++ b/sysinv/sysinv/sysinv/sysinv/conductor/kube_app.py @@ -32,12 +32,12 @@ from eventlet import queue from eventlet import Timeout from fm_api import constants as fm_constants from fm_api import fm_api -from oslo_config import cfg from oslo_log import log as logging from sysinv.api.controllers.v1 import kube_app from sysinv.common import constants from sysinv.common import exception from sysinv.common import kubernetes +from sysinv.common import image_versions from sysinv.common import utils as cutils from sysinv.common.storage_backend_conf import K8RbdProvisioner from sysinv.conductor import openstack @@ -49,15 +49,6 @@ from sysinv.openstack.common.gettextutils import _ # Log and config LOG = logging.getLogger(__name__) -kube_app_opts = [ - cfg.StrOpt('armada_image_tag', - default=('quay.io/airshipit/armada:' - '8a1638098f88d92bf799ef4934abe569789b885e-ubuntu_bionic'), - help='Docker image tag of Armada.'), - ] -CONF = cfg.CONF -CONF.register_opts(kube_app_opts) - # Constants APPLY_SEARCH_PATTERN = 'Processing Chart,' @@ -2532,6 +2523,9 @@ class DockerHelper(object): if registry: return registry + '/' + img_name, registry_auth return pub_img_tag, registry_auth + elif registry_name == registry_info['registry_replaced']: + registry_auth = registry_info['registry_auth'] + return pub_img_tag, registry_auth # If extracted registry_name is none of k8s.gcr.io, gcr.io, # quay.io and docker.io or no registry_name specified in image @@ -2584,10 +2578,25 @@ class DockerHelper(object): overrides_dir: {'bind': '/overrides', 'mode': 'ro'}, logs_dir: {'bind': ARMADA_CONTAINER_LOG_LOCATION, 'mode': 'rw'}} - armada_image = client.images.list(CONF.armada_image_tag) + quay_registry_url = self._dbapi.service_parameter_get_all( + service=constants.SERVICE_TYPE_DOCKER, + section=constants.SERVICE_PARAM_SECTION_DOCKER_QUAY_REGISTRY, + name=constants.SERVICE_PARAM_NAME_DOCKER_URL) + + if quay_registry_url: + quay_url = quay_registry_url[0].value + else: + quay_url = constants.DEFAULT_DOCKER_QUAY_REGISTRY + + armada_image_tag = quay_url + \ + image_versions.ARMADA_IMAGE_NAME + ":" + \ + image_versions.ARMADA_IMAGE_VERSION + + armada_image = client.images.list(armada_image_tag) + # Pull Armada image if it's not available if not armada_image: - LOG.info("Downloading Armada image %s ..." % CONF.armada_image_tag) + LOG.info("Downloading Armada image %s ..." % armada_image_tag) quay_registry_secret = self._dbapi.service_parameter_get_all( service=constants.SERVICE_TYPE_DOCKER, @@ -2599,12 +2608,12 @@ class DockerHelper(object): else: quay_registry_auth = None - client.images.pull(CONF.armada_image_tag, + client.images.pull(armada_image_tag, auth_config=quay_registry_auth) - LOG.info("Armada image %s downloaded!" % CONF.armada_image_tag) + LOG.info("Armada image %s downloaded!" % armada_image_tag) container = client.containers.run( - CONF.armada_image_tag, + armada_image_tag, name=ARMADA_CONTAINER_NAME, detach=True, volumes=binds, diff --git a/sysinv/sysinv/sysinv/sysinv/conductor/manager.py b/sysinv/sysinv/sysinv/sysinv/conductor/manager.py index 06c2af8c8d..dbb4895aaa 100644 --- a/sysinv/sysinv/sysinv/sysinv/conductor/manager.py +++ b/sysinv/sysinv/sysinv/sysinv/conductor/manager.py @@ -74,6 +74,7 @@ from sysinv.api.controllers.v1 import vim_api from sysinv.common import constants from sysinv.common import ceph as cceph from sysinv.common import exception +from sysinv.common import image_versions from sysinv.common import fm from sysinv.common import fernet from sysinv.common import health @@ -102,6 +103,7 @@ from sysinv.puppet import common as puppet_common from sysinv.puppet import puppet from sysinv.helm import common as helm_common from sysinv.helm import helm +from sysinv.helm import utils as helm_utils MANAGER_TOPIC = 'sysinv.conductor_manager' @@ -213,6 +215,7 @@ class ConductorManager(service.PeriodicService): # until host unlock and we need ceph-mon up in order to configure # ceph for the initial unlock. self._app = kube_app.AppOperator(self.dbapi) + self._docker = kube_app.DockerHelper(self.dbapi) self._ceph = iceph.CephOperator(self.dbapi) self._helm = helm.HelmOperator(self.dbapi) self._kube = kubernetes.KubeOperator(self.dbapi) @@ -224,6 +227,9 @@ class ConductorManager(service.PeriodicService): self._handle_restore_in_progress() + # Upgrade/Downgrade tiller if required + greenthread.spawn(self._upgrade_downgrade_tiller()) + LOG.info("sysinv-conductor start committed system=%s" % system.as_dict()) @@ -5118,6 +5124,76 @@ class ConductorManager(service.PeriodicService): return self.reapply_app(context, app_name) + def _upgrade_downgrade_tiller(self): + """Check if tiller needs to be upgraded or downgraded""" + LOG.info("_upgrade_downgrade_tiller") + + FIVE_MIN_IN_SECS = 300 + + try: + running_image = self._kube.kube_get_image_by_selector( + image_versions.TILLER_SELECTOR_NAME, + helm_common.HELM_NS_KUBE_SYSTEM, + image_versions.TILLER_CONTAINER_NAME) + + if running_image is None: + LOG.warning("Failed to get tiller image") + return + + LOG.info("Running tiller image: %s" % running_image) + + # Grab the version from the image name. Version is preceded + # by a ":" e.g. + # gcr.io/kubernetes-helm/tiller:v2.13.0 + running_image_name = running_image.split(":")[0] + running_version = running_image.split(":")[1] + if not running_version: + LOG.warning("Failed to get version from tiller image") + return + + # Verify the tiller version running + if running_version != image_versions.TILLER_IMAGE_VERSION: + + LOG.info("Running version of tiller does not match patching version of %s. " + "Upgrade in progress." + % image_versions.TILLER_IMAGE_VERSION) + download_image = running_image_name + ":" + image_versions.TILLER_IMAGE_VERSION + local_registry_auth = kube_app.get_local_docker_registry_auth() + self._docker._retrieve_specified_registries() + + # download the image, retry if it fails + while True: + try: + ret = self._docker.download_an_image("helm", + local_registry_auth, + download_image) + if not ret: + raise Exception + except Exception as e: + LOG.warning( + "Failed to download image '%s'. %s" % + (download_image, e)) + greenthread.sleep(FIVE_MIN_IN_SECS) + continue + break + + # reset the cached registries + self._docker._reset_registries_info() + + # Update the new image, retry if it fails + while True: + try: + helm_utils.helm_upgrade_tiller(download_image) + + except Exception as e: + LOG.warning("Failed to update the new image: %s" % e) + greenthread.sleep(FIVE_MIN_IN_SECS) + continue + break + + except Exception as e: + LOG.error("{}. Failed to upgrade/downgrade tiller.".format(e)) + def check_nodes_stable(self): hosts = self.dbapi.ihost_get_list() if (utils.is_host_simplex_controller(hosts[0]) and diff --git a/sysinv/sysinv/sysinv/sysinv/helm/utils.py b/sysinv/sysinv/sysinv/sysinv/helm/utils.py index b0951983ac..98e97d04bb 100644 --- a/sysinv/sysinv/sysinv/sysinv/helm/utils.py +++ b/sysinv/sysinv/sysinv/sysinv/helm/utils.py @@ -128,3 +128,31 @@ def get_openstack_pending_install_charts(): except Exception as e: raise exception.HelmTillerFailure( reason="Failed to obtain pending charts list: %s" % e) + + +def helm_upgrade_tiller(image): + LOG.info("Attempt to update image to %s" % image) + try: + + # Adding temporary workaround using helm init command with + # sed command until helm and tiller provide a fix for + # https://github.com/helm/helm/issues/6374 + workaround = '| sed "s@apiVersion: extensions/v1beta1@apiVersion: apps/v1@" ' \ + '| kubectl --kubeconfig /etc/kubernetes/admin.conf apply -f -' + + cmd = '{} {} {}'.format( + 'helm init --upgrade --kubeconfig /etc/kubernetes/admin.conf --tiller-image', + image, + workaround) + + proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True) + out, err = proc.communicate() + if err: + raise exception.HelmTillerFailure( + reason="Failed to upgrade/downgrade image: %s" % err) + + LOG.info("Image was updated to %s" % image) + + except Exception as e: + raise exception.HelmTillerFailure( + reason="Failed to upgrade/downgrade image: %s" % e)