Update upgrade code for removing Ceph Cache Tiering

Story: 2002884 Task: 22846 Change-Id: Ia2207eecea4fe4e590f774764a8beea83fa15fa7 Signed-off-by: Don Penney <don.penney@windriver.com> Signed-off-by: Jack Ding <jack.ding@windriver.com> Signed-off-by: Scott Little <scott.little@windriver.com>
2018-05-21 09:10:15 -05:00 · 2018-05-21 09:10:15 -05:00 · 2376458552
parent c5aa54e37a
commit 2376458552
3 changed files with 23 additions and 30 deletions
--- a/ceph/ceph-manager/ceph-manager/ceph_manager/constants.py
+++ b/ceph/ceph-manager/ceph-manager/ceph_manager/constants.py
@ -65,7 +65,7 @@ CEPH_MANAGER_TOPIC = 'sysinv.ceph_manager'
 SYSINV_CONFIG_FILE = '/etc/sysinv/sysinv.conf'

 # Titanium Cloud version strings
-TITANIUM_SERVER_VERSION_16_10 = '16.10'
+TITANIUM_SERVER_VERSION_18_03 = '18.03'

 CEPH_HEALTH_WARN_REQUIRE_JEWEL_OSDS_NOT_SET = (
    "all OSDs are running jewel or later but the "
--- a/ceph/ceph-manager/ceph-manager/ceph_manager/monitor.py
+++ b/ceph/ceph-manager/ceph-manager/ceph_manager/monitor.py
@ -22,20 +22,18 @@ import exception
 LOG = logging.getLogger(__name__)


-# When upgrading from 16.10 to 17.x Ceph goes from Hammer release
-# to Jewel release. After all storage nodes are upgraded to 17.x
-# the cluster is in HEALTH_WARN until administrator explicitly
-# enables require_jewel_osds flag - which signals Ceph that it
-# can safely transition from Hammer to Jewel
+# In 18.03 R5, ceph cache tiering was disabled and prevented from being
+# re-enabled. When upgrading from 18.03 (R5) to R6 we need to remove the
+# cache-tier from the crushmap ceph-cache-tiering
 #
-# This class is needed only when upgrading from 16.10 to 17.x
-# TODO: remove it after 1st 17.x release
+# This class is needed only when upgrading from R5 to R6
+# TODO: remove it after 1st R6 release
 #
 class HandleUpgradesMixin(object):

    def __init__(self, service):
        self.service = service
-        self.surpress_require_jewel_osds_warning = False
+        self.wait_for_upgrade_complete = False

    def setup(self, config):
        self._set_upgrade(self.service.retry_get_software_upgrade_status())
@ -45,9 +43,10 @@ class HandleUpgradesMixin(object):
        from_version = upgrade.get('from_version')
        if (state
                and state != constants.UPGRADE_COMPLETED
-                and from_version == constants.TITANIUM_SERVER_VERSION_16_10):
-            LOG.info(_LI("Surpress require_jewel_osds health warning"))
-            self.surpress_require_jewel_osds_warning = True
+                and from_version == constants.TITANIUM_SERVER_VERSION_18_03):
+
+            LOG.info(_LI("Wait for caph upgrade to complete before monitoring cluster."))
+            self.wait_for_upgrade_complete = True

    def set_flag_require_jewel_osds(self):
        try:
@ -73,7 +72,7 @@ class HandleUpgradesMixin(object):
        health = self.auto_heal(health)
        # filter out require_jewel_osds warning
        #
-        if not self.surpress_require_jewel_osds_warning:
+        if not self.wait_for_upgrade_complete:
            return health
        if health['health'] != constants.CEPH_HEALTH_WARN:
            return health
@ -114,17 +113,16 @@ class HandleUpgradesMixin(object):
            state = upgrade.get('state')
            # surpress require_jewel_osds in case upgrade is
            # in progress but not completed or aborting
-            if (not self.surpress_require_jewel_osds_warning
+            if (not self.wait_for_upgrade_complete
                    and (upgrade.get('from_version')
-                         == constants.TITANIUM_SERVER_VERSION_16_10)
+                         == constants.TITANIUM_SERVER_VERSION_18_03)
                    and state not in [
                        None,
                        constants.UPGRADE_COMPLETED,
                        constants.UPGRADE_ABORTING,
                        constants.UPGRADE_ABORT_COMPLETING,
                        constants.UPGRADE_ABORTING_ROLLBACK]):
-                LOG.info(_LI("Surpress require_jewel_osds health warning"))
-                self.surpress_require_jewel_osds_warning = True
+                self.wait_for_upgrade_complete = True
            # set require_jewel_osds in case upgrade is
            # not in progress or completed
            if (state in [None, constants.UPGRADE_COMPLETED]):
@ -135,16 +133,14 @@ class HandleUpgradesMixin(object):
                self.set_flag_require_jewel_osds()
                health = self._remove_require_jewel_osds_warning(health)
                LOG.info(_LI("Unsurpress require_jewel_osds health warning"))
-                self.surpress_require_jewel_osds_warning = False
+                self.wait_for_upgrade_complete = False
            # unsurpress require_jewel_osds in case upgrade
            # is aborting
-            if (self.surpress_require_jewel_osds_warning
-                    and state in [
-                        constants.UPGRADE_ABORTING,
-                        constants.UPGRADE_ABORT_COMPLETING,
-                        constants.UPGRADE_ABORTING_ROLLBACK]):
-                LOG.info(_LI("Unsurpress require_jewel_osds health warning"))
-                self.surpress_require_jewel_osds_warning = False
+            if (state in [
+                       constants.UPGRADE_ABORTING,
+                       constants.UPGRADE_ABORT_COMPLETING,
+                       constants.UPGRADE_ABORTING_ROLLBACK]):
+                self.wait_for_upgrade_complete = False
        return health


--- a/ceph/ceph-manager/ceph-manager/ceph_manager/server.py
+++ b/ceph/ceph-manager/ceph-manager/ceph_manager/server.py
@ -97,9 +97,6 @@ class RpcEndpoint(PeriodicTasks):
        return self.service.monitor.cluster_is_up


-# This class is needed only when upgrading from 16.10 to 17.x
-# TODO:  remove it after 1st 17.x release
-#
 class SysinvConductorUpgradeApi(object):
    def __init__(self):
        self.sysinv_conductor = None
@ -113,10 +110,10 @@ class SysinvConductorUpgradeApi(object):
        return upgrade

    @retry(wait_fixed=1000,
-           retry_on_exception=lambda exception:
+           retry_on_exception=lambda e:
               LOG.warn(_LW(
                   "Getting software upgrade status failed "
-                   "with: %s. Retrying... ") % str(exception)) or True)
+                   "with: %s. Retrying... ") % str(e)) or True)
    def retry_get_software_upgrade_status(self):
        return self.get_software_upgrade_status()