Tiller upgrade prep

This commit spawns a new greenthread inside the _start() routing of
the conductor. This thread calls the new _upgrade_downgrade_tiller()
method which checks if the tiller image version has changed and if so
upgrade/downgrade the image. If there is an issue with downloading or
updating the new image, the code sleeps for 5 minutes and try again.

Removes references of the armada image name in sysinv.conf. Image
versions will be stored in sysinv/common/image_versions.py.

Story: 2006590
Task: 36725
Signed-off-by: Kristine Bujold <kristine.bujold@windriver.com>
Change-Id: I3b3037fef25d2b37c98c4dd7e82b405b41a45a23
This commit is contained in:
Kristine Bujold 2019-09-24 12:47:46 -04:00
parent 8f648c2a36
commit 18d22a950f
6 changed files with 160 additions and 17 deletions

View File

@ -1,2 +1,2 @@
SRC_DIR="sysinv" SRC_DIR="sysinv"
TIS_PATCH_VER=332 TIS_PATCH_VER=333

View File

@ -0,0 +1,13 @@
#
# Copyright (c) 2019 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
TILLER_CONTAINER_NAME = 'tiller'
TILLER_SELECTOR_NAME = 'tiller'
TILLER_IMAGE_NAME = '/kubernetes-helm/tiller'
TILLER_IMAGE_VERSION = "v2.13.1"
ARMADA_IMAGE_NAME = '/airshipit/armada'
ARMADA_IMAGE_VERSION = "8a1638098f88d92bf799ef4934abe569789b885e-ubuntu_bionic"

View File

@ -1,5 +1,5 @@
# #
# Copyright (c) 2013-2018 Wind River Systems, Inc. # Copyright (c) 2013-2019 Wind River Systems, Inc.
# #
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# #
@ -84,6 +84,23 @@ class KubeOperator(object):
LOG.error("Kubernetes exception in kube_get_nodes: %s" % e) LOG.error("Kubernetes exception in kube_get_nodes: %s" % e)
raise raise
def kube_get_image_by_selector(self, template_name, namespace, container_name):
LOG.debug("kube_get_image_by_selector template_name=%s, namespace=%s" %
(template_name, namespace))
try:
# Retrieve the named pod.
api_response = self._get_kubernetesclient_core().list_namespaced_pod(
namespace, label_selector="name=%s" % template_name)
for pod in api_response.items:
if template_name in pod.metadata.name:
for container in pod.spec.containers:
if container.name == container_name:
return container.image
return None
except ApiException as e:
LOG.error("Kubernetes exception in list_namespaced_pod: %s" % e)
raise
def kube_create_namespace(self, namespace): def kube_create_namespace(self, namespace):
body = {'metadata': {'name': namespace}} body = {'metadata': {'name': namespace}}

View File

@ -32,12 +32,12 @@ from eventlet import queue
from eventlet import Timeout from eventlet import Timeout
from fm_api import constants as fm_constants from fm_api import constants as fm_constants
from fm_api import fm_api from fm_api import fm_api
from oslo_config import cfg
from oslo_log import log as logging from oslo_log import log as logging
from sysinv.api.controllers.v1 import kube_app from sysinv.api.controllers.v1 import kube_app
from sysinv.common import constants from sysinv.common import constants
from sysinv.common import exception from sysinv.common import exception
from sysinv.common import kubernetes from sysinv.common import kubernetes
from sysinv.common import image_versions
from sysinv.common import utils as cutils from sysinv.common import utils as cutils
from sysinv.common.storage_backend_conf import K8RbdProvisioner from sysinv.common.storage_backend_conf import K8RbdProvisioner
from sysinv.conductor import openstack from sysinv.conductor import openstack
@ -49,15 +49,6 @@ from sysinv.openstack.common.gettextutils import _
# Log and config # Log and config
LOG = logging.getLogger(__name__) LOG = logging.getLogger(__name__)
kube_app_opts = [
cfg.StrOpt('armada_image_tag',
default=('quay.io/airshipit/armada:'
'8a1638098f88d92bf799ef4934abe569789b885e-ubuntu_bionic'),
help='Docker image tag of Armada.'),
]
CONF = cfg.CONF
CONF.register_opts(kube_app_opts)
# Constants # Constants
APPLY_SEARCH_PATTERN = 'Processing Chart,' APPLY_SEARCH_PATTERN = 'Processing Chart,'
@ -2532,6 +2523,9 @@ class DockerHelper(object):
if registry: if registry:
return registry + '/' + img_name, registry_auth return registry + '/' + img_name, registry_auth
return pub_img_tag, registry_auth return pub_img_tag, registry_auth
elif registry_name == registry_info['registry_replaced']:
registry_auth = registry_info['registry_auth']
return pub_img_tag, registry_auth
# If extracted registry_name is none of k8s.gcr.io, gcr.io, # If extracted registry_name is none of k8s.gcr.io, gcr.io,
# quay.io and docker.io or no registry_name specified in image # quay.io and docker.io or no registry_name specified in image
@ -2584,10 +2578,25 @@ class DockerHelper(object):
overrides_dir: {'bind': '/overrides', 'mode': 'ro'}, overrides_dir: {'bind': '/overrides', 'mode': 'ro'},
logs_dir: {'bind': ARMADA_CONTAINER_LOG_LOCATION, 'mode': 'rw'}} logs_dir: {'bind': ARMADA_CONTAINER_LOG_LOCATION, 'mode': 'rw'}}
armada_image = client.images.list(CONF.armada_image_tag) quay_registry_url = self._dbapi.service_parameter_get_all(
service=constants.SERVICE_TYPE_DOCKER,
section=constants.SERVICE_PARAM_SECTION_DOCKER_QUAY_REGISTRY,
name=constants.SERVICE_PARAM_NAME_DOCKER_URL)
if quay_registry_url:
quay_url = quay_registry_url[0].value
else:
quay_url = constants.DEFAULT_DOCKER_QUAY_REGISTRY
armada_image_tag = quay_url + \
image_versions.ARMADA_IMAGE_NAME + ":" + \
image_versions.ARMADA_IMAGE_VERSION
armada_image = client.images.list(armada_image_tag)
# Pull Armada image if it's not available # Pull Armada image if it's not available
if not armada_image: if not armada_image:
LOG.info("Downloading Armada image %s ..." % CONF.armada_image_tag) LOG.info("Downloading Armada image %s ..." % armada_image_tag)
quay_registry_secret = self._dbapi.service_parameter_get_all( quay_registry_secret = self._dbapi.service_parameter_get_all(
service=constants.SERVICE_TYPE_DOCKER, service=constants.SERVICE_TYPE_DOCKER,
@ -2599,12 +2608,12 @@ class DockerHelper(object):
else: else:
quay_registry_auth = None quay_registry_auth = None
client.images.pull(CONF.armada_image_tag, client.images.pull(armada_image_tag,
auth_config=quay_registry_auth) auth_config=quay_registry_auth)
LOG.info("Armada image %s downloaded!" % CONF.armada_image_tag) LOG.info("Armada image %s downloaded!" % armada_image_tag)
container = client.containers.run( container = client.containers.run(
CONF.armada_image_tag, armada_image_tag,
name=ARMADA_CONTAINER_NAME, name=ARMADA_CONTAINER_NAME,
detach=True, detach=True,
volumes=binds, volumes=binds,

View File

@ -74,6 +74,7 @@ from sysinv.api.controllers.v1 import vim_api
from sysinv.common import constants from sysinv.common import constants
from sysinv.common import ceph as cceph from sysinv.common import ceph as cceph
from sysinv.common import exception from sysinv.common import exception
from sysinv.common import image_versions
from sysinv.common import fm from sysinv.common import fm
from sysinv.common import fernet from sysinv.common import fernet
from sysinv.common import health from sysinv.common import health
@ -102,6 +103,7 @@ from sysinv.puppet import common as puppet_common
from sysinv.puppet import puppet from sysinv.puppet import puppet
from sysinv.helm import common as helm_common from sysinv.helm import common as helm_common
from sysinv.helm import helm from sysinv.helm import helm
from sysinv.helm import utils as helm_utils
MANAGER_TOPIC = 'sysinv.conductor_manager' MANAGER_TOPIC = 'sysinv.conductor_manager'
@ -213,6 +215,7 @@ class ConductorManager(service.PeriodicService):
# until host unlock and we need ceph-mon up in order to configure # until host unlock and we need ceph-mon up in order to configure
# ceph for the initial unlock. # ceph for the initial unlock.
self._app = kube_app.AppOperator(self.dbapi) self._app = kube_app.AppOperator(self.dbapi)
self._docker = kube_app.DockerHelper(self.dbapi)
self._ceph = iceph.CephOperator(self.dbapi) self._ceph = iceph.CephOperator(self.dbapi)
self._helm = helm.HelmOperator(self.dbapi) self._helm = helm.HelmOperator(self.dbapi)
self._kube = kubernetes.KubeOperator(self.dbapi) self._kube = kubernetes.KubeOperator(self.dbapi)
@ -224,6 +227,9 @@ class ConductorManager(service.PeriodicService):
self._handle_restore_in_progress() self._handle_restore_in_progress()
# Upgrade/Downgrade tiller if required
greenthread.spawn(self._upgrade_downgrade_tiller())
LOG.info("sysinv-conductor start committed system=%s" % LOG.info("sysinv-conductor start committed system=%s" %
system.as_dict()) system.as_dict())
@ -5118,6 +5124,76 @@ class ConductorManager(service.PeriodicService):
return return
self.reapply_app(context, app_name) self.reapply_app(context, app_name)
def _upgrade_downgrade_tiller(self):
"""Check if tiller needs to be upgraded or downgraded"""
LOG.info("_upgrade_downgrade_tiller")
FIVE_MIN_IN_SECS = 300
try:
running_image = self._kube.kube_get_image_by_selector(
image_versions.TILLER_SELECTOR_NAME,
helm_common.HELM_NS_KUBE_SYSTEM,
image_versions.TILLER_CONTAINER_NAME)
if running_image is None:
LOG.warning("Failed to get tiller image")
return
LOG.info("Running tiller image: %s" % running_image)
# Grab the version from the image name. Version is preceded
# by a ":" e.g.
# gcr.io/kubernetes-helm/tiller:v2.13.0
running_image_name = running_image.split(":")[0]
running_version = running_image.split(":")[1]
if not running_version:
LOG.warning("Failed to get version from tiller image")
return
# Verify the tiller version running
if running_version != image_versions.TILLER_IMAGE_VERSION:
LOG.info("Running version of tiller does not match patching version of %s. "
"Upgrade in progress."
% image_versions.TILLER_IMAGE_VERSION)
download_image = running_image_name + ":" + image_versions.TILLER_IMAGE_VERSION
local_registry_auth = kube_app.get_local_docker_registry_auth()
self._docker._retrieve_specified_registries()
# download the image, retry if it fails
while True:
try:
ret = self._docker.download_an_image("helm",
local_registry_auth,
download_image)
if not ret:
raise Exception
except Exception as e:
LOG.warning(
"Failed to download image '%s'. %s" %
(download_image, e))
greenthread.sleep(FIVE_MIN_IN_SECS)
continue
break
# reset the cached registries
self._docker._reset_registries_info()
# Update the new image, retry if it fails
while True:
try:
helm_utils.helm_upgrade_tiller(download_image)
except Exception as e:
LOG.warning("Failed to update the new image: %s" % e)
greenthread.sleep(FIVE_MIN_IN_SECS)
continue
break
except Exception as e:
LOG.error("{}. Failed to upgrade/downgrade tiller.".format(e))
def check_nodes_stable(self): def check_nodes_stable(self):
hosts = self.dbapi.ihost_get_list() hosts = self.dbapi.ihost_get_list()
if (utils.is_host_simplex_controller(hosts[0]) and if (utils.is_host_simplex_controller(hosts[0]) and

View File

@ -128,3 +128,31 @@ def get_openstack_pending_install_charts():
except Exception as e: except Exception as e:
raise exception.HelmTillerFailure( raise exception.HelmTillerFailure(
reason="Failed to obtain pending charts list: %s" % e) reason="Failed to obtain pending charts list: %s" % e)
def helm_upgrade_tiller(image):
LOG.info("Attempt to update image to %s" % image)
try:
# Adding temporary workaround using helm init command with
# sed command until helm and tiller provide a fix for
# https://github.com/helm/helm/issues/6374
workaround = '| sed "s@apiVersion: extensions/v1beta1@apiVersion: apps/v1@" ' \
'| kubectl --kubeconfig /etc/kubernetes/admin.conf apply -f -'
cmd = '{} {} {}'.format(
'helm init --upgrade --kubeconfig /etc/kubernetes/admin.conf --tiller-image',
image,
workaround)
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True)
out, err = proc.communicate()
if err:
raise exception.HelmTillerFailure(
reason="Failed to upgrade/downgrade image: %s" % err)
LOG.info("Image was updated to %s" % image)
except Exception as e:
raise exception.HelmTillerFailure(
reason="Failed to upgrade/downgrade image: %s" % e)