Tiller upgrade prep

This commit spawns a new greenthread inside the _start() routing of
the conductor. This thread calls the new _upgrade_downgrade_tiller()
method which checks if the tiller image version has changed and if so
upgrade/downgrade the image. If there is an issue with downloading or
updating the new image, the code sleeps for 5 minutes and try again.

Removes references of the armada image name in sysinv.conf. Image
versions will be stored in sysinv/common/image_versions.py.

Story: 2006590
Task: 36725
Signed-off-by: Kristine Bujold <kristine.bujold@windriver.com>
Change-Id: I3b3037fef25d2b37c98c4dd7e82b405b41a45a23
This commit is contained in:
Kristine Bujold 2019-09-24 12:47:46 -04:00
parent 8f648c2a36
commit 18d22a950f
6 changed files with 160 additions and 17 deletions

View File

@ -1,2 +1,2 @@
SRC_DIR="sysinv"
TIS_PATCH_VER=332
TIS_PATCH_VER=333

View File

@ -0,0 +1,13 @@
#
# Copyright (c) 2019 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
TILLER_CONTAINER_NAME = 'tiller'
TILLER_SELECTOR_NAME = 'tiller'
TILLER_IMAGE_NAME = '/kubernetes-helm/tiller'
TILLER_IMAGE_VERSION = "v2.13.1"
ARMADA_IMAGE_NAME = '/airshipit/armada'
ARMADA_IMAGE_VERSION = "8a1638098f88d92bf799ef4934abe569789b885e-ubuntu_bionic"

View File

@ -1,5 +1,5 @@
#
# Copyright (c) 2013-2018 Wind River Systems, Inc.
# Copyright (c) 2013-2019 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
@ -84,6 +84,23 @@ class KubeOperator(object):
LOG.error("Kubernetes exception in kube_get_nodes: %s" % e)
raise
def kube_get_image_by_selector(self, template_name, namespace, container_name):
LOG.debug("kube_get_image_by_selector template_name=%s, namespace=%s" %
(template_name, namespace))
try:
# Retrieve the named pod.
api_response = self._get_kubernetesclient_core().list_namespaced_pod(
namespace, label_selector="name=%s" % template_name)
for pod in api_response.items:
if template_name in pod.metadata.name:
for container in pod.spec.containers:
if container.name == container_name:
return container.image
return None
except ApiException as e:
LOG.error("Kubernetes exception in list_namespaced_pod: %s" % e)
raise
def kube_create_namespace(self, namespace):
body = {'metadata': {'name': namespace}}

View File

@ -32,12 +32,12 @@ from eventlet import queue
from eventlet import Timeout
from fm_api import constants as fm_constants
from fm_api import fm_api
from oslo_config import cfg
from oslo_log import log as logging
from sysinv.api.controllers.v1 import kube_app
from sysinv.common import constants
from sysinv.common import exception
from sysinv.common import kubernetes
from sysinv.common import image_versions
from sysinv.common import utils as cutils
from sysinv.common.storage_backend_conf import K8RbdProvisioner
from sysinv.conductor import openstack
@ -49,15 +49,6 @@ from sysinv.openstack.common.gettextutils import _
# Log and config
LOG = logging.getLogger(__name__)
kube_app_opts = [
cfg.StrOpt('armada_image_tag',
default=('quay.io/airshipit/armada:'
'8a1638098f88d92bf799ef4934abe569789b885e-ubuntu_bionic'),
help='Docker image tag of Armada.'),
]
CONF = cfg.CONF
CONF.register_opts(kube_app_opts)
# Constants
APPLY_SEARCH_PATTERN = 'Processing Chart,'
@ -2532,6 +2523,9 @@ class DockerHelper(object):
if registry:
return registry + '/' + img_name, registry_auth
return pub_img_tag, registry_auth
elif registry_name == registry_info['registry_replaced']:
registry_auth = registry_info['registry_auth']
return pub_img_tag, registry_auth
# If extracted registry_name is none of k8s.gcr.io, gcr.io,
# quay.io and docker.io or no registry_name specified in image
@ -2584,10 +2578,25 @@ class DockerHelper(object):
overrides_dir: {'bind': '/overrides', 'mode': 'ro'},
logs_dir: {'bind': ARMADA_CONTAINER_LOG_LOCATION, 'mode': 'rw'}}
armada_image = client.images.list(CONF.armada_image_tag)
quay_registry_url = self._dbapi.service_parameter_get_all(
service=constants.SERVICE_TYPE_DOCKER,
section=constants.SERVICE_PARAM_SECTION_DOCKER_QUAY_REGISTRY,
name=constants.SERVICE_PARAM_NAME_DOCKER_URL)
if quay_registry_url:
quay_url = quay_registry_url[0].value
else:
quay_url = constants.DEFAULT_DOCKER_QUAY_REGISTRY
armada_image_tag = quay_url + \
image_versions.ARMADA_IMAGE_NAME + ":" + \
image_versions.ARMADA_IMAGE_VERSION
armada_image = client.images.list(armada_image_tag)
# Pull Armada image if it's not available
if not armada_image:
LOG.info("Downloading Armada image %s ..." % CONF.armada_image_tag)
LOG.info("Downloading Armada image %s ..." % armada_image_tag)
quay_registry_secret = self._dbapi.service_parameter_get_all(
service=constants.SERVICE_TYPE_DOCKER,
@ -2599,12 +2608,12 @@ class DockerHelper(object):
else:
quay_registry_auth = None
client.images.pull(CONF.armada_image_tag,
client.images.pull(armada_image_tag,
auth_config=quay_registry_auth)
LOG.info("Armada image %s downloaded!" % CONF.armada_image_tag)
LOG.info("Armada image %s downloaded!" % armada_image_tag)
container = client.containers.run(
CONF.armada_image_tag,
armada_image_tag,
name=ARMADA_CONTAINER_NAME,
detach=True,
volumes=binds,

View File

@ -74,6 +74,7 @@ from sysinv.api.controllers.v1 import vim_api
from sysinv.common import constants
from sysinv.common import ceph as cceph
from sysinv.common import exception
from sysinv.common import image_versions
from sysinv.common import fm
from sysinv.common import fernet
from sysinv.common import health
@ -102,6 +103,7 @@ from sysinv.puppet import common as puppet_common
from sysinv.puppet import puppet
from sysinv.helm import common as helm_common
from sysinv.helm import helm
from sysinv.helm import utils as helm_utils
MANAGER_TOPIC = 'sysinv.conductor_manager'
@ -213,6 +215,7 @@ class ConductorManager(service.PeriodicService):
# until host unlock and we need ceph-mon up in order to configure
# ceph for the initial unlock.
self._app = kube_app.AppOperator(self.dbapi)
self._docker = kube_app.DockerHelper(self.dbapi)
self._ceph = iceph.CephOperator(self.dbapi)
self._helm = helm.HelmOperator(self.dbapi)
self._kube = kubernetes.KubeOperator(self.dbapi)
@ -224,6 +227,9 @@ class ConductorManager(service.PeriodicService):
self._handle_restore_in_progress()
# Upgrade/Downgrade tiller if required
greenthread.spawn(self._upgrade_downgrade_tiller())
LOG.info("sysinv-conductor start committed system=%s" %
system.as_dict())
@ -5118,6 +5124,76 @@ class ConductorManager(service.PeriodicService):
return
self.reapply_app(context, app_name)
def _upgrade_downgrade_tiller(self):
"""Check if tiller needs to be upgraded or downgraded"""
LOG.info("_upgrade_downgrade_tiller")
FIVE_MIN_IN_SECS = 300
try:
running_image = self._kube.kube_get_image_by_selector(
image_versions.TILLER_SELECTOR_NAME,
helm_common.HELM_NS_KUBE_SYSTEM,
image_versions.TILLER_CONTAINER_NAME)
if running_image is None:
LOG.warning("Failed to get tiller image")
return
LOG.info("Running tiller image: %s" % running_image)
# Grab the version from the image name. Version is preceded
# by a ":" e.g.
# gcr.io/kubernetes-helm/tiller:v2.13.0
running_image_name = running_image.split(":")[0]
running_version = running_image.split(":")[1]
if not running_version:
LOG.warning("Failed to get version from tiller image")
return
# Verify the tiller version running
if running_version != image_versions.TILLER_IMAGE_VERSION:
LOG.info("Running version of tiller does not match patching version of %s. "
"Upgrade in progress."
% image_versions.TILLER_IMAGE_VERSION)
download_image = running_image_name + ":" + image_versions.TILLER_IMAGE_VERSION
local_registry_auth = kube_app.get_local_docker_registry_auth()
self._docker._retrieve_specified_registries()
# download the image, retry if it fails
while True:
try:
ret = self._docker.download_an_image("helm",
local_registry_auth,
download_image)
if not ret:
raise Exception
except Exception as e:
LOG.warning(
"Failed to download image '%s'. %s" %
(download_image, e))
greenthread.sleep(FIVE_MIN_IN_SECS)
continue
break
# reset the cached registries
self._docker._reset_registries_info()
# Update the new image, retry if it fails
while True:
try:
helm_utils.helm_upgrade_tiller(download_image)
except Exception as e:
LOG.warning("Failed to update the new image: %s" % e)
greenthread.sleep(FIVE_MIN_IN_SECS)
continue
break
except Exception as e:
LOG.error("{}. Failed to upgrade/downgrade tiller.".format(e))
def check_nodes_stable(self):
hosts = self.dbapi.ihost_get_list()
if (utils.is_host_simplex_controller(hosts[0]) and

View File

@ -128,3 +128,31 @@ def get_openstack_pending_install_charts():
except Exception as e:
raise exception.HelmTillerFailure(
reason="Failed to obtain pending charts list: %s" % e)
def helm_upgrade_tiller(image):
LOG.info("Attempt to update image to %s" % image)
try:
# Adding temporary workaround using helm init command with
# sed command until helm and tiller provide a fix for
# https://github.com/helm/helm/issues/6374
workaround = '| sed "s@apiVersion: extensions/v1beta1@apiVersion: apps/v1@" ' \
'| kubectl --kubeconfig /etc/kubernetes/admin.conf apply -f -'
cmd = '{} {} {}'.format(
'helm init --upgrade --kubeconfig /etc/kubernetes/admin.conf --tiller-image',
image,
workaround)
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True)
out, err = proc.communicate()
if err:
raise exception.HelmTillerFailure(
reason="Failed to upgrade/downgrade image: %s" % err)
LOG.info("Image was updated to %s" % image)
except Exception as e:
raise exception.HelmTillerFailure(
reason="Failed to upgrade/downgrade image: %s" % e)