Revert "Refactor upgrade re-tries on failure path"

This reverts commit 50f2d8c5aa.

In cases where this retry is triggered, the sysinv-conductor does not complete its startup, and causes ansible bootstrap to fail with an RPC error.

Change-Id: I062d1e85e71abb66146b66f4bcb627f814cbbc42
This commit is contained in:
Al Bailey 2019-10-18 22:45:51 +00:00
parent 50f2d8c5aa
commit 60bc70a668
5 changed files with 30 additions and 62 deletions

View File

@ -1,2 +1,2 @@
SRC_DIR="sysinv" SRC_DIR="sysinv"
TIS_PATCH_VER=337 TIS_PATCH_VER=336

View File

@ -128,9 +128,6 @@ conductor_opts = [
cfg.IntOpt('managed_app_auto_recovery_interval', cfg.IntOpt('managed_app_auto_recovery_interval',
default=300, default=300,
help='Interval to run managed app auto recovery'), help='Interval to run managed app auto recovery'),
cfg.IntOpt('kube_upgrade_downgrade_retry_interval',
default=3600,
help='Interval in seconds between retries to upgrade/downgrade kubernetes components'),
] ]
CONF = cfg.CONF CONF = cfg.CONF
@ -5160,8 +5157,6 @@ class ConductorManager(service.PeriodicService):
self._upgrade_downgrade_tiller() self._upgrade_downgrade_tiller()
self._upgrade_downgrade_kube_networking() self._upgrade_downgrade_kube_networking()
@retry(retry_on_result=lambda x: x is False,
wait_fixed=(CONF.conductor.kube_upgrade_downgrade_retry_interval * 1000))
def _upgrade_downgrade_tiller(self): def _upgrade_downgrade_tiller(self):
"""Check if tiller needs to be upgraded or downgraded""" """Check if tiller needs to be upgraded or downgraded"""
LOG.info("_upgrade_downgrade_tiller") LOG.info("_upgrade_downgrade_tiller")
@ -5202,10 +5197,9 @@ class ConductorManager(service.PeriodicService):
if running_image is None: if running_image is None:
LOG.warning("Failed to get tiller image") LOG.warning("Failed to get tiller image")
return False return
LOG.info("Running tiller image: %s" % running_image) LOG.info("Running tiller image: %s" % running_image)
LOG.info("Requested tiller version: %s" % image_versions.TILLER_IMAGE_VERSION)
# Grab the version from the image name. Version is preceded # Grab the version from the image name. Version is preceded
# by a ":" e.g. # by a ":" e.g.
@ -5213,7 +5207,7 @@ class ConductorManager(service.PeriodicService):
running_image_name, running_version = running_image.rsplit(":", 1) running_image_name, running_version = running_image.rsplit(":", 1)
if not running_version: if not running_version:
LOG.warning("Failed to get version from tiller image") LOG.warning("Failed to get version from tiller image")
return False return
# Verify the tiller version running # Verify the tiller version running
if running_version != image_versions.TILLER_IMAGE_VERSION: if running_version != image_versions.TILLER_IMAGE_VERSION:
@ -5225,36 +5219,39 @@ class ConductorManager(service.PeriodicService):
local_registry_auth = cutils.get_local_docker_registry_auth() local_registry_auth = cutils.get_local_docker_registry_auth()
self._docker._retrieve_specified_registries() self._docker._retrieve_specified_registries()
# download the image # download the image, retry if it fails
try: while True:
img_tag, ret = self._docker.download_an_image("helm", try:
local_registry_auth, ret = self._docker.download_an_image("helm",
download_image) local_registry_auth,
if not ret: download_image)
raise Exception if not ret:
except Exception as e: raise Exception
LOG.warning("Failed to download image '%s'. %s" % (download_image, e)) except Exception as e:
return False LOG.warning(
"Failed to download image '%s'. %s" %
(download_image, e))
greenthread.sleep(FIVE_MIN_IN_SECS)
continue
break
# reset the cached registries # reset the cached registries
self._docker._reset_registries_info() self._docker._reset_registries_info()
# Update the new image # Update the new image, retry if it fails
try: while True:
helm_utils.helm_upgrade_tiller(download_image) try:
helm_utils.helm_upgrade_tiller(download_image)
except Exception as e: except Exception as e:
LOG.warning("Failed to update the new image: %s" % e) LOG.warning("Failed to update the new image: %s" % e)
return False greenthread.sleep(FIVE_MIN_IN_SECS)
continue
break
except Exception as e: except Exception as e:
LOG.error("{}. Failed to upgrade/downgrade tiller.".format(e)) LOG.error("{}. Failed to upgrade/downgrade tiller.".format(e))
return False
return True
@retry(retry_on_result=lambda x: x is False,
wait_fixed=(CONF.conductor.kube_upgrade_downgrade_retry_interval * 1000))
def _upgrade_downgrade_kube_networking(self): def _upgrade_downgrade_kube_networking(self):
try: try:
LOG.info( LOG.info(
@ -5274,9 +5271,6 @@ class ConductorManager(service.PeriodicService):
except Exception as e: except Exception as e:
LOG.error("Failed to upgrade/downgrade kubernetes " LOG.error("Failed to upgrade/downgrade kubernetes "
"networking images: {}".format(e)) "networking images: {}".format(e))
return False
return True
def check_nodes_stable(self): def check_nodes_stable(self):
hosts = self.dbapi.ihost_get_list() hosts = self.dbapi.ihost_get_list()

View File

@ -2,7 +2,7 @@
# -*- encoding: utf-8 -*- # -*- encoding: utf-8 -*-
# #
# #
# Copyright (c) 2017-2019 Wind River Systems, Inc. # Copyright (c) 2017-2018 Wind River Systems, Inc.
# #
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# #
@ -546,9 +546,6 @@ class StorageTierDependentTCs(base.FunctionalTest):
set_monitors_status_patcher = mock.patch.object(ceph_utils.CephApiOperator, 'get_monitors_status') set_monitors_status_patcher = mock.patch.object(ceph_utils.CephApiOperator, 'get_monitors_status')
set_is_initial_config_patcher = mock.patch.object(cutils, 'is_initial_config_complete') set_is_initial_config_patcher = mock.patch.object(cutils, 'is_initial_config_complete')
upgrade_downgrade_kube_components_patcher = mock.patch.object(
manager.ConductorManager, '_upgrade_downgrade_kube_components')
def setUp(self): def setUp(self):
super(StorageTierDependentTCs, self).setUp() super(StorageTierDependentTCs, self).setUp()
self.mock_set_crushmap = self.set_crushmap_patcher.start() self.mock_set_crushmap = self.set_crushmap_patcher.start()
@ -566,14 +563,11 @@ class StorageTierDependentTCs(base.FunctionalTest):
self.host_index = -1 self.host_index = -1
self.mon_index = -1 self.mon_index = -1
self.mock_upgrade_downgrade_kube_components = self.upgrade_downgrade_kube_components_patcher.start()
def tearDown(self): def tearDown(self):
super(StorageTierDependentTCs, self).tearDown() super(StorageTierDependentTCs, self).tearDown()
self.set_crushmap_patcher.stop() self.set_crushmap_patcher.stop()
self.set_monitors_status_patcher = self.set_monitors_status_patcher.stop() self.set_monitors_status_patcher = self.set_monitors_status_patcher.stop()
self.set_is_initial_config_patcher.stop() self.set_is_initial_config_patcher.stop()
self.upgrade_downgrade_kube_components_patcher.stop()
def assertDeleted(self, fullPath): def assertDeleted(self, fullPath):
self.get_json(fullPath, expect_errors=True) # Make sure this line raises an error self.get_json(fullPath, expect_errors=True) # Make sure this line raises an error

View File

@ -1,7 +1,7 @@
# vim: tabstop=4 shiftwidth=4 softtabstop=4 # vim: tabstop=4 shiftwidth=4 softtabstop=4
# coding=utf-8 # coding=utf-8
# Copyright (c) 2017-2019 Wind River Systems, Inc. # Copyright (c) 2017-2018 Wind River Systems, Inc.
# #
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# #
@ -40,9 +40,6 @@ class UpdateCephCluster(base.DbTestCase):
# - test_add_valid_mix_tiers # - test_add_valid_mix_tiers
# - test_add_4_mix_bbbb # - test_add_4_mix_bbbb
upgrade_downgrade_kube_components_patcher = mock.patch.object(
manager.ConductorManager, '_upgrade_downgrade_kube_components')
def setUp(self): def setUp(self):
super(UpdateCephCluster, self).setUp() super(UpdateCephCluster, self).setUp()
self.service = manager.ConductorManager('test-host', 'test-topic') self.service = manager.ConductorManager('test-host', 'test-topic')
@ -53,12 +50,6 @@ class UpdateCephCluster(base.DbTestCase):
self.load = utils.create_test_load() self.load = utils.create_test_load()
self.host_index = -1 self.host_index = -1
self.mock_upgrade_downgrade_kube_components = self.upgrade_downgrade_kube_components_patcher.start()
def tearDown(self):
super(UpdateCephCluster, self).tearDown()
self.upgrade_downgrade_kube_components_patcher.stop()
def _create_storage_ihost(self, hostname): def _create_storage_ihost(self, hostname):
self.host_index += 1 self.host_index += 1
ihost_dict = utils.get_test_ihost( ihost_dict = utils.get_test_ihost(

View File

@ -17,13 +17,11 @@
# License for the specific language governing permissions and limitations # License for the specific language governing permissions and limitations
# under the License. # under the License.
# #
# Copyright (c) 2013-2019 Wind River Systems, Inc. # Copyright (c) 2013-2016 Wind River Systems, Inc.
# #
"""Test class for Sysinv ManagerService.""" """Test class for Sysinv ManagerService."""
import mock
from sysinv.common import exception from sysinv.common import exception
from sysinv.conductor import manager from sysinv.conductor import manager
from sysinv.db import api as dbapi from sysinv.db import api as dbapi
@ -34,9 +32,6 @@ from sysinv.tests.db import utils
class ManagerTestCase(base.DbTestCase): class ManagerTestCase(base.DbTestCase):
upgrade_downgrade_kube_components_patcher = mock.patch.object(
manager.ConductorManager, '_upgrade_downgrade_kube_components')
def setUp(self): def setUp(self):
super(ManagerTestCase, self).setUp() super(ManagerTestCase, self).setUp()
self.service = manager.ConductorManager('test-host', 'test-topic') self.service = manager.ConductorManager('test-host', 'test-topic')
@ -46,12 +41,6 @@ class ManagerTestCase(base.DbTestCase):
self.system = utils.create_test_isystem() self.system = utils.create_test_isystem()
self.load = utils.create_test_load() self.load = utils.create_test_load()
self.mock_upgrade_downgrade_kube_components = self.upgrade_downgrade_kube_components_patcher.start()
def tearDown(self):
super(ManagerTestCase, self).tearDown()
self.upgrade_downgrade_kube_components_patcher.stop()
def _create_test_ihost(self, **kwargs): def _create_test_ihost(self, **kwargs):
# ensure the system ID for proper association # ensure the system ID for proper association
kwargs['forisystemid'] = self.system['id'] kwargs['forisystemid'] = self.system['id']