Ceph for standard: Semantic checks

In order to enable Openstack's helm charts on StarlingX we need a distributed persistent storage for Kubernetes that leverages our existing storage configurations. For this stage we will enable CEPH's RBD to work with Kubernetes RBD provisioner through a new Helm chart. Since RBD will be the persistent storage solution, CEPH support has to be extended to the 1, 2 node and Standard configurations. This commit adds multiple semantic checks to make sure that Ceph on a standard deployment works as expected. Change-Id: I31786fa78e8c926a57077bb0a25f1e4cbf543cab Co-Authored-By: Stefan Dinescu <stefan.dinescu@windriver.com> Implements: containerization-2002844-CEPH-persistent-storage-backend-for-Kubernetes Story: 2002844 Task: 28723 Depends-On: https://review.openstack.org/629512 Signed-off-by: Ovidiu Poncea <Ovidiu.Poncea@windriver.com>
2019-01-08 16:01:02 +02:00 · 2019-01-08 16:01:02 +02:00 · 747595fb0b
parent 3c674e0cc7
commit 747595fb0b
10 changed files with 338 additions and 112 deletions
--- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/ceph_mon.py
+++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/ceph_mon.py
@ -35,6 +35,7 @@ from sysinv.api.controllers.v1 import controller_fs as controller_fs_utils
 from sysinv.api.controllers.v1 import link
 from sysinv.api.controllers.v1 import types
 from sysinv.api.controllers.v1 import utils
+from sysinv.common import ceph
 from sysinv.common import constants
 from sysinv.common import exception
 from sysinv.common import utils as cutils
@ -406,13 +407,57 @@ def _set_defaults(ceph_mon):


 def _create(ceph_mon):
+    # validate host
+    try:
+        chost = pecan.request.dbapi.ihost_get(ceph_mon['ihost_uuid'])
+    except exception.ServerNotFound:
+        raise wsme.exc.ClientSideError(
+            _("Host not found uuid: %s ." % ceph_mon['ihost_uuid']))
+
+    ceph_mon['forihostid'] = chost['id']
+
+    # check if ceph monitor is already configured
+    if pecan.request.dbapi.ceph_mon_get_by_ihost(ceph_mon['forihostid']):
+        raise wsme.exc.ClientSideError(
+            _("Ceph monitor already configured for host '%s'." % chost['hostname']))
+
+    # only one instance of the 3rd ceph monitor is allowed
+    ceph_mons = pecan.request.dbapi.ceph_mon_get_list()
+    for mon in ceph_mons:
+        h = pecan.request.dbapi.ihost_get(mon['forihostid'])
+        if h.personality in [constants.STORAGE, constants.WORKER]:
+            raise wsme.exc.ClientSideError(
+                _("Ceph monitor already configured for host '%s'." % h['hostname']))
+
+    # Adding a ceph monitor to a worker selects Ceph's deployment model
+    if chost['personality'] == constants.WORKER:
+        # Only if replication model is CONTROLLER or not yet defined
+        stor_model = ceph.get_ceph_storage_model()
+        worker_stor_models = [constants.CEPH_CONTROLLER_MODEL, constants.CEPH_UNDEFINED_MODEL]
+        if stor_model not in worker_stor_models:
+            raise wsme.exc.ClientSideError(
+                _("Can not add a storage monitor to a worker if "
+                  "ceph's deployments model is already set to %s." % stor_model))
+
+        replication, min_replication = \
+            StorageBackendConfig.get_ceph_max_replication(pecan.request.dbapi)
+        supported_replication = constants.CEPH_CONTROLLER_MODEL_REPLICATION_SUPPORTED
+        if replication not in supported_replication:
+            raise wsme.exc.ClientSideError(
+                _("Ceph monitor can be added to a worker only if "
+                  "replication is set to: %s'. Please update replication "
+                  "before configuring a monitor on a worker node." % supported_replication))
+
+    # host must be locked and online
+    if (chost['availability'] != constants.AVAILABILITY_ONLINE or
+            chost['administrative'] != constants.ADMIN_LOCKED):
+        raise wsme.exc.ClientSideError(
+            _("Host %s must be locked and online." % chost['hostname']))
+
    ceph_mon = _set_defaults(ceph_mon)

    _check_ceph_mon(ceph_mon)

-    chost = pecan.request.dbapi.ihost_get(ceph_mon['ihost_uuid'])
-    ceph_mon['forihostid'] = chost['id']
-
    controller_fs_utils._check_controller_fs(
        ceph_mon_gib_new=ceph_mon['ceph_mon_gib'])

@ -452,12 +497,17 @@ def _create(ceph_mon):
    # At this moment the only possibility to add a dynamic monitor
    # is on a worker node, so we check for that.
    if chost.personality == constants.WORKER:
-        # Storage nodes are not supported on a controller based
-        # storage model.
-        personalities = [constants.CONTROLLER, constants.WORKER]
-        pecan.request.rpcapi.update_ceph_base_config(
-            pecan.request.context,
-            personalities)
+        try:
+            # Storage nodes are not supported on a controller based
+            # storage model.
+            personalities = [constants.CONTROLLER, constants.WORKER]
+            pecan.request.rpcapi.update_ceph_base_config(
+                pecan.request.context,
+                personalities)
+        except Exception:
+            values = {'state': constants.SB_STATE_CONFIG_ERR, 'task': None}
+            pecan.request.dbapi.ceph_mon_update(new_ceph_mon['uuid'], values)
+            raise

    # The return value needs to be iterable, so make it a list.
    return [new_ceph_mon]
--- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py
+++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py
@ -2282,11 +2282,8 @@ class HostController(rest.RestController):
                             "monitor available. At least %s unlocked and "
                             "enabled hosts with monitors are required. Please"
                             " ensure hosts with monitors are unlocked and "
-                             "enabled - candidates: %s, %s, %s") %
-                             (num_monitors, constants.MIN_STOR_MONITORS,
-                              constants.CONTROLLER_0_HOSTNAME,
-                              constants.CONTROLLER_1_HOSTNAME,
-                              constants.STORAGE_0_HOSTNAME))
+                             "enabled.") %
+                             (num_monitors, constants.MIN_STOR_MONITORS))

            # If it is the last storage node to delete, we need to delete
            # ceph osd pools and update additional tier status to "defined"
@ -3025,15 +3022,23 @@ class HostController(rest.RestController):
                patched_ihost['subfunctions'] = subfunctions

        elif patched_ihost['personality'] == constants.STORAGE:
-            # Storage nodes are only allowed if we are configured to use
-            # ceph for the cinder backend.
+            # Storage nodes are only allowed if we are configured to use ceph.
            if not StorageBackendConfig.has_backend_configured(
                pecan.request.dbapi,
-                constants.CINDER_BACKEND_CEPH
+                constants.SB_TYPE_CEPH
            ):
                raise wsme.exc.ClientSideError(
                    _("Storage nodes can only be configured if storage "
-                      "cluster is configured for the cinder backend."))
+                      "cluster is configured for the Ceph backend."))
+
+            # Storage nodes are allowed when using the CEPH_STORAGE_MODEL model
+            stor_model = ceph.get_ceph_storage_model()
+            if stor_model not in [constants.CEPH_STORAGE_MODEL, constants.CEPH_UNDEFINED_MODEL]:
+                # Adding storage-0 when storage model is CEPH_UNDEFINED_MODEL will
+                # set it to CEPH_STORAGE_MODEL.
+                raise wsme.exc.ClientSideError(
+                    _("Storage nodes can not be configured for "
+                      "the '%s' storage model." % stor_model))

            current_storage_ihosts = \
                pecan.request.dbapi.ihost_get_by_personality(constants.STORAGE)
@ -4408,7 +4413,7 @@ class HostController(rest.RestController):
        api = pecan.request.dbapi

        backend = StorageBackendConfig.get_configuring_backend(api)
-        if backend and backend.backend == constants.CINDER_BACKEND_CEPH:
+        if backend and backend.backend == constants.SB_TYPE_CEPH:
            ihosts = api.ihost_get_by_personality(
                constants.CONTROLLER
            )
@ -4419,9 +4424,14 @@ class HostController(rest.RestController):

            # check if customer needs to install storage nodes
            if backend.task == constants.SB_TASK_RECONFIG_CONTROLLER:
-                if HostController._check_provisioned_storage_hosts():
-                    # Storage nodes are provisioned. This means that
-                    # this is not the first time Ceph is configured
+                stor_model = ceph.get_ceph_storage_model()
+                if (HostController._check_provisioned_storage_hosts() or
+                        stor_model == constants.CEPH_CONTROLLER_MODEL):
+                    # This means that either:
+                    # 1. Storage nodes are already provisioned (this is not
+                    #    the first time Ceph is configured) or
+                    # 2. We are on a standard config and we don't need to
+                    #    configure storage nodes at all.
                    api.storage_backend_update(backend.uuid, {
                        'state': constants.SB_STATE_CONFIGURED,
                        'task': None
@ -4906,7 +4916,8 @@ class HostController(rest.RestController):

        subfunctions_set = \
            set(hostupdate.ihost_patch[constants.SUBFUNCTIONS].split(','))
-        if constants.WORKER in subfunctions_set:
+        if (personality == constants.WORKER or
+                constants.WORKER in subfunctions_set):
            self.check_lock_worker(hostupdate)

        hostupdate.notify_vim = True
@ -4938,20 +4949,38 @@ class HostController(rest.RestController):

        if StorageBackendConfig.has_backend_configured(
                    pecan.request.dbapi,
-                    constants.CINDER_BACKEND_CEPH):
+                    constants.SB_TYPE_CEPH):
+            query_hosts = None
+            stor_model = ceph.get_ceph_storage_model()
+            if stor_model == constants.CEPH_STORAGE_MODEL:
+                query_hosts = constants.STORAGE
+            elif stor_model == constants.CEPH_CONTROLLER_MODEL:
+                query_hosts = constants.CONTROLLER
+            else:
+                # If backend type is still undefined it means no storage nodes
+                # have been configured and no worker monitor has been added,
+                # so it is safe to not check the quorum.
+                # Or we are dealing with an AIO-SX.
+                return
            try:
-                st_nodes = pecan.request.dbapi.ihost_get_by_personality(constants.STORAGE)
+                st_nodes = pecan.request.dbapi.ihost_get_by_personality(query_hosts)
            except exception.NodeNotFound:
                # If we don't have any storage nodes we don't need to
                # check for quorum. We'll allow the node to be locked.
+                # We will always have at least one controller, so for
+                # controllers that also act as storage nodes this should
+                # never happen.
                return
+
            # TODO(oponcea) remove once SM supports in-service config reload
            # Allow locking controllers when all storage nodes are locked.
-            for node in st_nodes:
-                if (node['administrative'] == constants.ADMIN_UNLOCKED):
-                    break
-            else:
-                return
+            if stor_model == constants.CEPH_STORAGE_MODEL:
+                for node in st_nodes:
+                    if (node['administrative'] == constants.ADMIN_UNLOCKED):
+                        break
+                else:
+                    return
+
            if (hostupdate.ihost_orig['administrative'] ==
                    constants.ADMIN_UNLOCKED and
                    hostupdate.ihost_orig['operational'] ==
@ -4986,11 +5015,8 @@ class HostController(rest.RestController):
                             "monitor available. At least %s unlocked and "
                             "enabled hosts with monitors are required. Please"
                             " ensure hosts with monitors are unlocked and "
-                             "enabled - candidates: %s, %s, %s") %
-                             (num_monitors, constants.MIN_STOR_MONITORS,
-                              constants.CONTROLLER_0_HOSTNAME,
-                              constants.CONTROLLER_1_HOSTNAME,
-                              constants.STORAGE_0_HOSTNAME))
+                             "enabled.") %
+                             (num_monitors, constants.MIN_STOR_MONITORS))

        if not force:
            # sm-lock-pre-check
@ -5185,9 +5211,9 @@ class HostController(rest.RestController):
                        storage_nodes = pecan.request.dbapi.ihost_get_by_personality(
                            personality=constants.STORAGE)
                    except Exception:
-                        raise wsme.exc.ClientSideError(
-                            _("Can not unlock a worker node until at "
-                              "least one storage node is unlocked and enabled."))
+                        # We are unlocking worker node when no storage nodes are
+                        # defined. This is ok in CEPH_CONTROLLER_MODEL.
+                        pass
                    is_storage_host_unlocked = False
                    if storage_nodes:
                        for node in storage_nodes:
@ -5197,8 +5223,9 @@ class HostController(rest.RestController):

                                is_storage_host_unlocked = True
                                break
-
-                    if not is_storage_host_unlocked:
+                    stor_model = ceph.get_ceph_storage_model()
+                    if (not is_storage_host_unlocked and
+                            not stor_model == constants.CEPH_CONTROLLER_MODEL):
                        raise wsme.exc.ClientSideError(
                            _("Can not unlock a worker node until at "
                              "least one storage node is unlocked and enabled."))
@ -5229,11 +5256,8 @@ class HostController(rest.RestController):
                  "monitor available. At least %s unlocked and "
                  "enabled hosts with monitors are required. Please"
                  " ensure hosts with monitors are unlocked and "
-                  "enabled - candidates: %s, %s, %s") %
-                (num_monitors, constants.MIN_STOR_MONITORS,
-                 constants.CONTROLLER_0_HOSTNAME,
-                 constants.CONTROLLER_1_HOSTNAME,
-                 constants.STORAGE_0_HOSTNAME))
+                  "enabled.") %
+                (num_monitors, constants.MIN_STOR_MONITORS))

        # Check Ceph configuration, if it is wiped out (in the Backup & Restore
        # process) then restore the configuration.
@ -5528,11 +5552,8 @@ class HostController(rest.RestController):
                             "monitor available. At least %s unlocked and "
                             "enabled hosts with monitors are required. Please"
                             " ensure hosts with monitors are unlocked and "
-                             "enabled - candidates: %s, %s, %s") %
-                             (num_monitors, constants.MIN_STOR_MONITORS,
-                              constants.CONTROLLER_0_HOSTNAME,
-                              constants.CONTROLLER_1_HOSTNAME,
-                              constants.STORAGE_0_HOSTNAME))
+                             "enabled.") %
+                             (num_monitors, constants.MIN_STOR_MONITORS))

            storage_nodes = pecan.request.dbapi.ihost_get_by_personality(
                constants.STORAGE)
@ -5607,50 +5628,79 @@ class HostController(rest.RestController):
    def check_lock_worker(self, hostupdate, force=False):
        """Pre lock semantic checks for worker"""

+        hostname = hostupdate.ihost_patch.get('hostname')
        LOG.info("%s host check_lock_worker" % hostupdate.displayid)
        if force:
+            LOG.info("Forced lock of host: %s" % hostname)
            return

-        upgrade = None
-        try:
-            upgrade = pecan.request.dbapi.software_upgrade_get_one()
-        except exception.NotFound:
-            return
-
-        upgrade_state = upgrade.state
        system = pecan.request.dbapi.isystem_get_one()
        system_mode = system.system_mode
        system_type = system.system_type
-        hostname = hostupdate.ihost_patch.get('hostname')

        if system_mode == constants.SYSTEM_MODE_SIMPLEX:
            return

-        if upgrade_state in [
-                constants.UPGRADE_STARTING,
-                constants.UPGRADE_STARTED,
-                constants.UPGRADE_DATA_MIGRATION,
-                constants.UPGRADE_DATA_MIGRATION_COMPLETE,
-                constants.UPGRADE_DATA_MIGRATION_FAILED]:
-            if system_type == constants.TIS_AIO_BUILD:
-                if hostname == constants.CONTROLLER_1_HOSTNAME:
-                    # Allow AIO-DX lock of controller-1
-                    return
-            raise wsme.exc.ClientSideError(
-                _("Rejected: Can not lock %s with worker function "
-                  "at this upgrade stage '%s'.") %
-                (hostupdate.displayid, upgrade_state))
+        # Check upgrade state for controllers with worker subfunction
+        subfunctions_set = \
+            set(hostupdate.ihost_patch[constants.SUBFUNCTIONS].split(','))
+        if (hostupdate.ihost_orig['personality'] == constants.CONTROLLER and
+                constants.WORKER in subfunctions_set):
+            upgrade = None
+            try:
+                upgrade = pecan.request.dbapi.software_upgrade_get_one()
+                upgrade_state = upgrade.state
+            except exception.NotFound:
+                upgrade_state = None

-        if upgrade_state in [constants.UPGRADE_UPGRADING_CONTROLLERS]:
-            if system_type == constants.TIS_AIO_BUILD:
-                # Allow lock for AIO-DX controller-0 after upgrading
-                # controller-1. Allow lock for AIO-DX controllers.
-                if hostname == constants.CONTROLLER_0_HOSTNAME:
-                    return
-            raise wsme.exc.ClientSideError(
-                _("Rejected: Can not lock %s with worker function "
-                  "at this upgrade stage '%s'.") %
-                (hostupdate.displayid, upgrade_state))
+            if upgrade_state in [
+                    constants.UPGRADE_STARTING,
+                    constants.UPGRADE_STARTED,
+                    constants.UPGRADE_DATA_MIGRATION,
+                    constants.UPGRADE_DATA_MIGRATION_COMPLETE,
+                    constants.UPGRADE_DATA_MIGRATION_FAILED]:
+                if system_type == constants.TIS_AIO_BUILD:
+                    if hostname == constants.CONTROLLER_1_HOSTNAME:
+                        # Allow AIO-DX lock of controller-1
+                        return
+                raise wsme.exc.ClientSideError(
+                    _("Rejected: Can not lock %s with worker function "
+                      "at this upgrade stage '%s'.") %
+                    (hostupdate.displayid, upgrade_state))
+
+            if upgrade_state in [constants.UPGRADE_UPGRADING_CONTROLLERS]:
+                if system_type == constants.TIS_AIO_BUILD:
+                    # Allow lock for AIO-DX controller-0 after upgrading
+                    # controller-1. Allow lock for AIO-DX controllers.
+                    if hostname == constants.CONTROLLER_0_HOSTNAME:
+                        return
+                raise wsme.exc.ClientSideError(
+                    _("Rejected: Can not lock %s with worker function "
+                      "at this upgrade stage '%s'.") %
+                    (hostupdate.displayid, upgrade_state))
+
+        # Worker node with a Ceph Monitor service? Make sure at least
+        # two monitors will remain up after lock.
+        host_id = hostupdate.ihost_orig.get('id')
+        ceph_mon = pecan.request.dbapi.ceph_mon_get_by_ihost(host_id)
+        if ceph_mon:
+            if (hostupdate.ihost_orig['personality'] ==
+                    constants.WORKER and
+                    hostupdate.ihost_orig['administrative'] ==
+                    constants.ADMIN_UNLOCKED and
+                    hostupdate.ihost_orig['operational'] ==
+                    constants.OPERATIONAL_ENABLED):
+                num_monitors, required_monitors, quorum_names = \
+                    self._ceph.get_monitors_status(pecan.request.dbapi)
+                if (hostname in quorum_names and
+                     num_monitors - 1 < required_monitors):
+                    raise wsme.exc.ClientSideError(_(
+                         "Only %d Ceph "
+                         "monitors available. At least %s unlocked and "
+                         "enabled hosts with monitors are required. "
+                         "Please ensure hosts with monitors are "
+                         "unlocked and enabled.") %
+                         (num_monitors, constants.MIN_STOR_MONITORS))

    def check_unlock_interfaces(self, hostupdate):
        """Semantic check for interfaces on host-unlock."""
@ -6112,6 +6162,8 @@ class HostController(rest.RestController):
        LOG.info("%s _handle_unlock_action" % hostupdate.displayid)
        if hostupdate.ihost_patch.get('personality') == constants.STORAGE:
            self._handle_unlock_storage_host(hostupdate)
+        elif hostupdate.ihost_patch.get('personality') == constants.WORKER:
+            self._handle_unlock_worker_host(hostupdate)
        hostupdate.notify_vim_action = False
        hostupdate.notify_mtce = True
        val = {'ihost_action': constants.UNLOCK_ACTION}
@ -6121,6 +6173,14 @@ class HostController(rest.RestController):
    def _handle_unlock_storage_host(self, hostupdate):
        self._ceph.update_crushmap(hostupdate)

+    def _handle_unlock_worker_host(self, hostupdate):
+        # Update crushmap if we unlocked the worker with a ceph monitor.
+        monitor_list = pecan.request.dbapi.ceph_mon_get_list()
+        for mon in monitor_list:
+            ihost = pecan.request.dbapi.ihost_get(mon['forihostid'])
+            if ihost.id == hostupdate.ihost_orig['id']:
+                self._ceph.update_crushmap(hostupdate)
+
    @staticmethod
    def _handle_lock_action(hostupdate):
        """Handle host-lock action."""
--- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/storage.py
+++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/storage.py
@ -497,13 +497,11 @@ def _check_host(stor):
    if ihost['administrative'] != constants.ADMIN_LOCKED:
        raise wsme.exc.ClientSideError(_("Host must be locked"))

-    # semantic check: whether personality == storage or we have k8s AIO SX
-    is_k8s_aio = (utils.is_aio_system(pecan.request.dbapi) and
-                  utils.is_kubernetes_config(pecan.request.dbapi))
-    if not is_k8s_aio and ihost['personality'] != constants.STORAGE:
-            msg = ("Host personality must be 'storage' or kubernetes enabled "
-                   "1 or 2 node system")
-            raise wsme.exc.ClientSideError(_(msg))
+    # semantic check: only storage nodes are allowed without k8s
+    if (not utils.is_kubernetes_config(pecan.request.dbapi) and
+            ihost['personality'] != constants.STORAGE):
+        msg = ("Host personality must be 'storage' or kubernetes enabled.")
+        raise wsme.exc.ClientSideError(_(msg))

    # semantic check: whether system has a ceph backend
    if not StorageBackendConfig.has_backend_configured(
@ -526,8 +524,24 @@ def _check_host(stor):
                "Only %d storage monitor available. "
                "At least %s unlocked and enabled hosts with monitors are "
                "required. Please ensure hosts with monitors are unlocked "
-                "and enabled - candidates: controller-0, controller-1, "
-                "storage-0") % (num_monitors, required_monitors))
+                "and enabled.") % (num_monitors, required_monitors))
+
+    # semantic check: whether OSD can be added to this host.
+    stor_model = ceph.get_ceph_storage_model()
+    if stor_model == constants.CEPH_STORAGE_MODEL:
+        if ihost.personality != constants.STORAGE:
+            msg = ("Storage model is '%s'. Storage devices can only be added "
+                   "to storage nodes." % stor_model)
+            raise wsme.exc.ClientSideError(_(msg))
+    elif stor_model == constants.CEPH_CONTROLLER_MODEL:
+        if ihost.personality != constants.CONTROLLER:
+            msg = ("Storage model is '%s'. Storage devices can only be added "
+                   "to controller nodes." % stor_model)
+            raise wsme.exc.ClientSideError(_(msg))
+    elif stor_model == constants.CEPH_UNDEFINED_MODEL:
+        msg = ("Please install storage-0 or configure a Ceph monitor "
+               "on a worker node.")
+        raise wsme.exc.ClientSideError(_(msg))


 def _check_disk(stor):
--- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/storage_ceph.py
+++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/storage_ceph.py
@ -41,6 +41,7 @@ from sysinv.api.controllers.v1 import link
 from sysinv.api.controllers.v1 import types
 from sysinv.api.controllers.v1 import utils
 from sysinv.api.controllers.v1.utils import SBApiHelper as api_helper
+from sysinv.common import ceph
 from sysinv.common import constants
 from sysinv.common import exception
 from sysinv.common import utils as cutils
@ -974,6 +975,14 @@ def _check_replication_number(new_cap, orig_cap):
            raise wsme.exc.ClientSideError(
                _("Can not modify ceph replication factor on "
                  "two node configuration."))
+
+        if ceph.get_ceph_storage_model() == constants.CEPH_CONTROLLER_MODEL:
+            # Replication change is not allowed when storage OSDs
+            # are enabled on controllers.
+            raise wsme.exc.ClientSideError(
+                _("Can not modify replication factor on "
+                  "'%s' ceph deployment model." % constants.CEPH_CONTROLLER_MODEL))
+
        # On a standard install we allow modifications of ceph storage
        # backend parameters after the manifests have been applied and
        # before first storage node has been configured.
--- a/sysinv/sysinv/sysinv/sysinv/common/ceph.py
+++ b/sysinv/sysinv/sysinv/sysinv/common/ceph.py
@ -645,21 +645,18 @@ class CephApiOperator(object):
        required_monitors = constants.MIN_STOR_MONITORS
        quorum_names = []
        inventory_monitor_names = []
-        ihosts = db_api.ihost_get_list()
-        for ihost in ihosts:
-            if ihost['personality'] == constants.WORKER:
-                continue
-            capabilities = ihost['capabilities']
-            if 'stor_function' in capabilities:
-                host_action = ihost['ihost_action'] or ""
-                locking = (host_action.startswith(constants.LOCK_ACTION) or
-                           host_action.startswith(constants.FORCE_LOCK_ACTION))
-                if (capabilities['stor_function'] == constants.STOR_FUNCTION_MONITOR and
-                   ihost['administrative'] == constants.ADMIN_UNLOCKED and
-                   ihost['operational'] == constants.OPERATIONAL_ENABLED and
-                   not locking):
-                    num_inv_monitors += 1
-                    inventory_monitor_names.append(ihost['hostname'])
+
+        monitor_list = db_api.ceph_mon_get_list()
+        for mon in monitor_list:
+            ihost = db_api.ihost_get(mon['forihostid'])
+            host_action = ihost['ihost_action'] or ""
+            locking = (host_action.startswith(constants.LOCK_ACTION) or
+                       host_action.startswith(constants.FORCE_LOCK_ACTION))
+            if (ihost['administrative'] == constants.ADMIN_UNLOCKED and
+               ihost['operational'] == constants.OPERATIONAL_ENABLED and
+               not locking):
+                num_inv_monitors += 1
+                inventory_monitor_names.append(ihost['hostname'])

        LOG.info("Active ceph monitors in inventory = %s" % str(inventory_monitor_names))

@ -702,15 +699,16 @@ class CephApiOperator(object):


 def fix_crushmap(dbapi=None):
-    # Crush Map: Replication of PGs across storage node pairs
+    """ Set Ceph's CRUSH Map based on storage model """
    if not dbapi:
        dbapi = pecan.request.dbapi
    crushmap_flag_file = os.path.join(constants.SYSINV_CONFIG_PATH,
                                      constants.CEPH_CRUSH_MAP_APPLIED)
    if not os.path.isfile(crushmap_flag_file):
-        if utils.is_aio_simplex_system(dbapi):
+        stor_model = get_ceph_storage_model(dbapi)
+        if stor_model == constants.CEPH_AIO_SX_MODEL:
            crushmap_txt = "/etc/sysinv/crushmap-aio-sx.txt"
-        elif utils.is_aio_duplex_system(dbapi):
+        elif stor_model == constants.CEPH_CONTROLLER_MODEL:
            crushmap_txt = "/etc/sysinv/crushmap-controller-model.txt"
        else:
            crushmap_txt = "/etc/sysinv/crushmap-storage-model.txt"
@ -736,3 +734,59 @@ def fix_crushmap(dbapi=None):
                       'Reason: {}').format(crushmap_flag_file, e))

        return True
+
+
+def get_ceph_storage_model(dbapi=None):
+
+    if not dbapi:
+        dbapi = pecan.request.dbapi
+
+    if utils.is_aio_simplex_system(dbapi):
+        return constants.CEPH_AIO_SX_MODEL
+
+    if utils.is_aio_duplex_system(dbapi):
+        return constants.CEPH_CONTROLLER_MODEL
+
+    is_storage_model = False
+    is_controller_model = False
+
+    monitor_list = dbapi.ceph_mon_get_list()
+    for mon in monitor_list:
+        ihost = dbapi.ihost_get(mon['forihostid'])
+        if ihost.personality == constants.WORKER:
+            # 3rd monitor is on a compute node, so OSDs are on controller
+            is_controller_model = True
+        elif ihost.personality == constants.STORAGE:
+            # 3rd monitor is on storage-0, so OSDs are also on storage nodes
+            is_storage_model = True
+
+    # There are cases where we delete the monitor on worker node and have not
+    # yet assigned it to another worker. In this case check if any OSDs have
+    # been configured on controller nodes.
+
+    if not is_storage_model:
+        controller_hosts = dbapi.ihost_get_by_personality(constants.CONTROLLER)
+        for chost in controller_hosts:
+            istors = dbapi.istor_get_by_ihost(chost['uuid'])
+            if len(istors):
+                LOG.info("Controller host %s has OSDs configured. System has ceph "
+                         "controller storage." % chost['hostname'])
+                is_controller_model = True
+                break
+
+    if is_storage_model and is_controller_model:
+        # Both types should not be true at the same time, but we should log a
+        # message for debug purposes
+        # TODO(sdinescu): Improve error message
+        LOG.error("Wrong ceph storage type. Bad configuration.")
+        return constants.CEPH_STORAGE_MODEL
+    elif is_storage_model:
+        return constants.CEPH_STORAGE_MODEL
+    elif is_controller_model:
+        return constants.CEPH_CONTROLLER_MODEL
+    else:
+        # This case is for the install stage where the decision
+        # to configure OSDs on controller or storage nodes is not
+        # clear (before adding a monitor on a compute or before
+        # configuring the first storage node)
+        return constants.CEPH_UNDEFINED_MODEL
--- a/sysinv/sysinv/sysinv/sysinv/common/constants.py
+++ b/sysinv/sysinv/sysinv/sysinv/common/constants.py
@ -434,6 +434,15 @@ SB_CEPH_MON_GIB_MAX = 40

 SB_CONFIGURATION_TIMEOUT = 1200

+# Ceph storage deployment model
+# Controller model: OSDs are on controllers, no storage nodes can
+# be defined.
+# Storage model: OSDs are on dedicated storage nodes.
+CEPH_STORAGE_MODEL = 'storage'
+CEPH_CONTROLLER_MODEL = 'controller'
+CEPH_AIO_SX_MODEL = 'aio-sx'
+CEPH_UNDEFINED_MODEL = 'undefined'
+
 # Storage: Minimum number of monitors
 MIN_STOR_MONITORS = 2

@ -779,6 +788,7 @@ CEPH_TARGET_PGS_PER_OSD = 200
 # Dual node and Storage
 CEPH_REPLICATION_FACTOR_DEFAULT = 2
 CEPH_REPLICATION_FACTOR_SUPPORTED = [2, 3]
+CEPH_CONTROLLER_MODEL_REPLICATION_SUPPORTED = [2]

 # Single node
 AIO_SX_CEPH_REPLICATION_FACTOR_DEFAULT = 1
--- a/sysinv/sysinv/sysinv/sysinv/common/storage_backend_conf.py
+++ b/sysinv/sysinv/sysinv/sysinv/common/storage_backend_conf.py
@ -142,7 +142,7 @@ class StorageBackendConfig(object):
    @staticmethod
    def has_backend_configured(dbapi, target, service=None,
                               check_only_defaults=True, rpcapi=None):
-        """ Check is a backend is configured. """
+        """ Check if a backend is configured. """
        # If cinder is a shared service on another region and
        # we want to know if the ceph backend is configured,
        # send a rpc to conductor which sends a query to the primary
--- a/sysinv/sysinv/sysinv/sysinv/conductor/manager.py
+++ b/sysinv/sysinv/sysinv/sysinv/conductor/manager.py
@ -5664,7 +5664,6 @@ class ConductorManager(service.PeriodicService):
        # Update service table
        self.update_service_table_for_cinder()

-        # TODO(oponcea): Uncomment when SM supports in-service config reload
        ctrls = self.dbapi.ihost_get_by_personality(constants.CONTROLLER)
        valid_ctrls = [ctrl for ctrl in ctrls if
                       ctrl.administrative == constants.ADMIN_UNLOCKED and
@ -5741,7 +5740,6 @@ class ConductorManager(service.PeriodicService):
                (node.administrative == constants.ADMIN_UNLOCKED and
                 node.operational == constants.OPERATIONAL_ENABLED)]

-        # TODO: check what other puppet class need to be called
        config_dict = {
            "personalities": personalities,
            "host_uuids": [node.uuid for node in valid_nodes],
--- a/sysinv/sysinv/sysinv/sysinv/tests/api/test_storage_tier.py
+++ b/sysinv/sysinv/sysinv/sysinv/tests/api/test_storage_tier.py
@ -530,6 +530,7 @@ class StorageTierDependentTCs(base.FunctionalTest):
        self.system = dbutils.create_test_isystem()
        self.load = dbutils.create_test_load()
        self.host_index = -1
+        self.mon_index = -1

    def tearDown(self):
        super(StorageTierDependentTCs, self).tearDown()
@ -554,6 +555,17 @@ class StorageTierDependentTCs(base.FunctionalTest):
            invprovision='unprovisioned')
        return self.dbapi.ihost_create(ihost_dict)

+    def _create_storage_mon(self, hostname, ihost_id):
+        self.mon_index += 1
+        ceph_mon_dict = dbutils.get_test_mon(
+            id=self.mon_index,
+            uuid=uuidutils.generate_uuid(),
+            state=constants.SB_STATE_CONFIGURED,
+            task=constants.SB_TASK_NONE,
+            forihostid=ihost_id,
+            hostname=hostname)
+        return self.dbapi.ceph_mon_create(ceph_mon_dict)
+
    #
    # StorageTier with stors
    #
@ -567,6 +579,8 @@ class StorageTierDependentTCs(base.FunctionalTest):
                                           device_path='/dev/disk/by-path/pci-0000:00:0d.0-ata-2.0',
                                           forihostid=storage_0.id)

+        self._create_storage_mon('storage-0', storage_0['id'])
+
        # Mock the fsid call so that we don't have to wait for the timeout
        with mock.patch.object(ceph.CephWrapper, 'fsid') as mock_fsid:
            mock_fsid.return_value = (mock.MagicMock(ok=False), None)
--- a/sysinv/sysinv/sysinv/sysinv/tests/db/utils.py
+++ b/sysinv/sysinv/sysinv/sysinv/tests/db/utils.py
@ -439,6 +439,23 @@ def get_test_stor(**kw):
    return stor


+def get_test_mon(**kw):
+    mon = {
+        'id': kw.get('id', 2),
+        'uuid': kw.get('uuid'),
+
+        'device_path': kw.get('device_path', ''),
+        'ceph_mon_gib': kw.get('ceph_mon_gib', 20),
+        'state': kw.get('state', 'configured'),
+        'task': kw.get('task', None),
+
+        'forihostid': kw.get('forihostid', 0),
+        'ihost_uuid': kw.get('ihost_uuid', '1be26c0b-03f2-4d2e-ae87-c02d7f33c781'),
+        'hostname': kw.get('hostname', 'controller-0'),
+    }
+    return mon
+
+
 def get_test_lvg(**kw):
    lvg = {
        'id': kw.get('id', 2),