Make Ceph the default Storage Backend

Since Ceph is mandatory for all operations it is now configured by default. User no longer needs to use CLI commands to enable it. This commit make Ceph the default backend in both config_controller and Ansible playbook. In Ansible, we decouple Ceph DB configuration from manifests application. The playbook sets needed data structures in sysinv DB but Ceph itself is enabled on first unlock when controller manifests are applied on first controller-0 unlock. This commit also adds: o ceph crushmap loading through puppet as ceph is no longer running before unlock to load it and the crushmap is needed when OSDs are configured on unlock. o add possibility to remove OSD stors while node is locked and corresponding stor state is configuring-on-unlock. Change-Id: I937bfb4f9397e726966067f54331d649f53f4fe2 Story: 2004695 Task: 30053 Depends-On: https://review.openstack.org/#/c/643914/ Signed-off-by: Ovidiu Poncea <ovidiu.poncea@windriver.com>
2019-03-22 17:38:31 +02:00 · 2019-03-22 17:38:31 +02:00 · fcf3fbf5f4
commit fcf3fbf5f4
parent e7fae38bed
20 changed files with 241 additions and 249 deletions
--- a/controllerconfig/controllerconfig/controllerconfig/configassistant.py
+++ b/controllerconfig/controllerconfig/controllerconfig/configassistant.py
@ -5077,6 +5077,15 @@ class ConfigAssistant():
            }
            client.sysinv.route.create(**values)

+    def _populate_default_storage_backend(self, client, controller):
+        # Create the Ceph monitor for controller-0
+        values = {'ihost_uuid': controller.uuid}
+        client.sysinv.ceph_mon.create(**values)
+
+        # Create the Ceph default backend
+        values = {'confirmed': True}
+        client.sysinv.storage_ceph.create(**values)
+
    def _populate_infrastructure_interface(self, client, controller):
        """Configure the infrastructure interface(s)"""
        if not self.infrastructure_interface:
@ -5349,6 +5358,7 @@ class ConfigAssistant():
                # ceph_mon config requires controller host to be created
                self._inventory_config_complete_wait(client, controller)
                self._populate_interface_config(client, controller)
+                self._populate_default_storage_backend(client, controller)

        except (KeystoneFail, SysInvFail) as e:
            LOG.exception(e)
--- a/playbookconfig/playbookconfig/playbooks/bootstrap/roles/persist-config/files/populate_initial_config.py
+++ b/playbookconfig/playbookconfig/playbooks/bootstrap/roles/persist-config/files/populate_initial_config.py
@ -19,8 +19,8 @@ import time
 # constants in controllerconfig. When it is time to remove/deprecate these
 # packages, classes OpenStack, Token and referenced constants need to be moved
 # to this standalone script.
-from controllerconfig import ConfigFail
 from controllerconfig.common import constants
+from controllerconfig import ConfigFail
 from controllerconfig import openstack
 from controllerconfig import sysinv_api as sysinv

@ -711,6 +711,19 @@ def inventory_config_complete_wait(client, controller):
    wait_pv_config(client, controller)


+def populate_default_storage_backend(client, controller):
+    if not INITIAL_POPULATION:
+        return
+
+    print("Populating ceph-mon config for controller-0...")
+    values = {'ihost_uuid': controller.uuid}
+    client.sysinv.ceph_mon.create(**values)
+
+    print("Populating ceph storage backend config...")
+    values = {'confirmed': True}
+    client.sysinv.storage_ceph.create(**values)
+
+
 def handle_invalid_input():
    raise Exception("Invalid input!\nUsage: <bootstrap-config-file> "
                    "[--system] [--network] [--service]")
@ -757,6 +770,7 @@ if __name__ == '__main__':
            populate_docker_config(client)
            controller = populate_controller_config(client)
            inventory_config_complete_wait(client, controller)
+            populate_default_storage_backend(client, controller)
            os.remove(config_file)
            if INITIAL_POPULATION:
                print("Successfully updated the initial system config.")
--- a/puppet-manifests/src/modules/platform/lib/facter/is_initial_cinder_ceph_config.rb
+++ b/puppet-manifests/src/modules/platform/lib/facter/is_initial_cinder_ceph_config.rb
@ -1,8 +0,0 @@
-# Returns true if cinder ceph needs to be configured
-
-Facter.add("is_initial_cinder_ceph_config") do
-  setcode do
-    conf_path = Facter::Core::Execution.exec("hiera --config /etc/puppet/hiera.yaml platform::params::config_path")
-    ! File.exist?(conf_path +'.initial_cinder_ceph_config_complete')
-  end
-end
--- a/puppet-manifests/src/modules/platform/lib/facter/is_initial_cinder_config.rb
+++ b/puppet-manifests/src/modules/platform/lib/facter/is_initial_cinder_config.rb
@ -1,8 +0,0 @@
-# Returns true is this is the initial cinder config for this system
-
-Facter.add("is_initial_cinder_config") do
-  setcode do
-    conf_path = Facter::Core::Execution.exec("hiera --config /etc/puppet/hiera.yaml platform::params::config_path")
-    ! File.exist?(conf_path + '.initial_cinder_config_complete')
-  end
-end
--- a/puppet-manifests/src/modules/platform/lib/facter/is_initial_cinder_lvm_config.rb
+++ b/puppet-manifests/src/modules/platform/lib/facter/is_initial_cinder_lvm_config.rb
@ -1,8 +0,0 @@
-# Returns true if cinder lvm needs to be configured
-
-Facter.add("is_initial_cinder_lvm_config") do
-  setcode do
-    conf_path = Facter::Core::Execution.exec("hiera --config /etc/puppet/hiera.yaml platform::params::config_path")
-    ! File.exist?(conf_path + '.initial_cinder_lvm_config_complete')
-  end
-end
--- a/puppet-manifests/src/modules/platform/lib/facter/is_node_cinder_lvm_config.rb
+++ b/puppet-manifests/src/modules/platform/lib/facter/is_node_cinder_lvm_config.rb
@ -1,7 +0,0 @@
-# Returns true if cinder LVM needs to be configured on current node
-
-Facter.add("is_node_cinder_lvm_config") do
-  setcode do
-    ! File.exist?('/etc/platform/.node_cinder_lvm_config_complete')
-  end
-end
--- a/puppet-manifests/src/modules/platform/manifests/ceph.pp
+++ b/puppet-manifests/src/modules/platform/manifests/ceph.pp
@ -153,11 +153,11 @@ class platform::ceph::post
  }

  if $service_enabled {
+    # Ceph configuration on this node is done
    file { $node_ceph_configured_flag:
      ensure => present
    }
  }
-
 }


@ -169,8 +169,8 @@ class platform::ceph::monitor

  if $service_enabled {
    if $system_type == 'All-in-one' and 'duplex' in $system_mode {
-      if str2bool($::is_controller_active) {
-        # Ceph mon is configured on a DRBD partition, on the active controller,
+      if str2bool($::is_standalone_controller) {
+        # Ceph mon is configured on a DRBD partition,
        # when 'ceph' storage backend is added in sysinv.
        # Then SM takes care of starting ceph after manifests are applied.
        $configure_ceph_mon = true
@ -236,6 +236,31 @@ class platform::ceph::monitor
    # ensure configuration is complete before creating monitors
    Class['::ceph'] -> Ceph::Mon <| |>

+    # ensure we load the crushmap at first unlock
+    if $system_type == 'All-in-one' and str2bool($::is_standalone_controller) {
+      if 'duplex' in $system_mode {
+        $crushmap_txt = '/etc/sysinv/crushmap-controller-model.txt'
+      } else {
+        $crushmap_txt = '/etc/sysinv/crushmap-aio-sx.txt'
+      }
+      $crushmap_bin = '/etc/sysinv/crushmap.bin'
+      Ceph::Mon <| |>
+      -> exec { 'Compile crushmap':
+        command   => "crushtool -c ${crushmap_txt} -o ${crushmap_bin}",
+        onlyif    => "test ! -f ${crushmap_bin}",
+        logoutput => true,
+      }
+      -> exec { 'Set crushmap':
+        command   => "ceph osd setcrushmap -i ${crushmap_bin}",
+        unless    => 'ceph osd crush rule list --format plain | grep -e "storage_tier_ruleset"',
+        logoutput => true,
+      }
+      -> Platform_ceph_osd <| |>
+    }
+
+    # Ensure networking is up before Monitors are configured
+    Anchor['platform::networking'] -> Ceph::Mon <| |>
+
    # default configuration for all ceph monitor resources
    Ceph::Mon {
      fsid => $cluster_uuid,
@ -248,33 +273,10 @@ class platform::ceph::monitor
        public_addr => $floating_mon_ip,
      }

-      if (str2bool($::is_controller_active) and
-          str2bool($::is_initial_cinder_ceph_config) and
-          !str2bool($::is_standalone_controller)) {
+      # On AIO-DX there is a single, floating, Ceph monitor backed by DRBD.
+      # Therefore DRBD must be up before Ceph monitor is configured
+      Drbd::Resource <| |> -> Ceph::Mon <| |>

-
-        # When we configure ceph after both controllers are active,
-        # we need to stop the monitor, unmount the monitor partition
-        # and set the drbd role to secondary, so that the handoff to
-        # SM is done properly once we swact to the standby controller.
-        # TODO: Remove this once SM supports in-service config reload.
-        Ceph::Mon <| |>
-        -> exec { 'Stop Ceph monitor':
-          command   =>'/etc/init.d/ceph stop mon',
-          onlyif    => '/etc/init.d/ceph status mon',
-          logoutput => true,
-        }
-        -> exec { 'umount ceph-mon partition':
-          command   => "umount ${mon_mountpoint}",
-          onlyif    => "mount | grep -q ${mon_mountpoint}",
-          logoutput => true,
-        }
-        -> exec { 'Set cephmon secondary':
-          command   => 'drbdadm secondary drbd-cephmon',
-          unless    => "drbdadm role drbd-cephmon | egrep '^Secondary'",
-          logoutput => true,
-        }
-      }
    } else {
      if $::hostname == $mon_0_host {
        ceph::mon { $mon_0_host:
@ -295,8 +297,7 @@ class platform::ceph::monitor
  }
 }

-
-define platform_ceph_osd(
+define osd_crush_location(
  $osd_id,
  $osd_uuid,
  $disk_path,
@ -311,12 +312,28 @@ define platform_ceph_osd(
      "osd.${$osd_id}/crush_location": value => "root=${tier_name}-tier host=${$::platform::params::hostname}-${$tier_name}";
    }
  }
-  file { "/var/lib/ceph/osd/ceph-${osd_id}":
+}
+
+define platform_ceph_osd(
+  $osd_id,
+  $osd_uuid,
+  $disk_path,
+  $data_path,
+  $journal_path,
+  $tier_name,
+) {
+
+  Anchor['platform::networking']  # Make sure networking is up before running ceph commands
+  -> file { "/var/lib/ceph/osd/ceph-${osd_id}":
    ensure => 'directory',
    owner  => 'root',
    group  => 'root',
    mode   => '0755',
  }
+  -> exec { "ceph osd create ${osd_uuid} ${osd_id}":
+    logoutput => true,
+    command   => template('platform/ceph.osd.create.erb'),
+  }
  -> ceph::osd { $disk_path:
    uuid => $osd_uuid,
  }
@ -351,8 +368,13 @@ class platform::ceph::osds(
    mode   => '0755',
  }

+  # Ensure ceph.conf is complete before configuring OSDs
+  Class['::ceph'] -> Platform_ceph_osd <| |>
+
  # Journal disks need to be prepared before the OSDs are configured
  Platform_ceph_journal <| |> -> Platform_ceph_osd <| |>
+  # Crush locations in ceph.conf need to be set before the OSDs are configured
+  Osd_crush_location <| |> -> Platform_ceph_osd <| |>

  # default configuration for all ceph object resources
  Ceph::Osd {
@ -360,6 +382,7 @@ class platform::ceph::osds(
    cluster_uuid => $cluster_uuid,
  }

+  create_resources('osd_crush_location', $osd_config)
  create_resources('platform_ceph_osd', $osd_config)
  create_resources('platform_ceph_journal', $journal_config)
 }
@ -479,6 +502,7 @@ class platform::ceph::runtime_base {

 class platform::ceph::runtime_osds {
  include ::ceph::params
+  include ::platform::ceph
  include ::platform::ceph::osds

  # Since this is runtime we have to avoid checking status of Ceph while we
--- a/puppet-manifests/src/modules/platform/manifests/drbd.pp
+++ b/puppet-manifests/src/modules/platform/manifests/drbd.pp
@ -445,16 +445,13 @@ class platform::drbd::cephmon ()
  $system_mode = $::platform::params::system_mode
  $system_type = $::platform::params::system_type

-  #TODO: This will change once we remove the native cinder service
-  if (str2bool($::is_initial_config_primary) or
-      (str2bool($::is_controller_active) and str2bool($::is_initial_cinder_ceph_config))
-  ){
+  if str2bool($::is_standalone_controller) and ! str2bool($::is_node_ceph_configured) {
    # Active controller, first time configuration.
    $drbd_primary = true
    $drbd_initial = true
    $drbd_automount = true

-  } elsif str2bool($::is_standalone_controller){
+  } elsif str2bool($::is_standalone_controller) {
    # Active standalone controller, successive reboots.
    $drbd_primary = true
    $drbd_initial = undef
@ -490,9 +487,9 @@ class platform::drbd(
  $service_enable = false,
  $service_ensure = 'stopped',
 ) {
-  if (str2bool($::is_initial_config_primary)
+  if (str2bool($::is_initial_config_primary) or str2bool($::is_standalone_controller)
  ){
-    # Enable DRBD at config_controller
+    # Enable DRBD on standalone
    class { '::drbd':
      service_enable => true,
      service_ensure => 'running',
@ -553,41 +550,55 @@ class platform::drbd::runtime {
  }
 }

+class platform::drbd::runtime_service_enable {
+
+  class { '::drbd':
+    service_enable => true,
+    service_ensure => 'running'
+  }
+}

 class platform::drbd::pgsql::runtime {
  include ::platform::drbd::params
+  include ::platform::drbd::runtime_service_enable
  include ::platform::drbd::pgsql
 }


 class platform::drbd::cgcs::runtime {
  include ::platform::drbd::params
+  include ::platform::drbd::runtime_service_enable
  include ::platform::drbd::cgcs
 }


 class platform::drbd::extension::runtime {
  include ::platform::drbd::params
+  include ::platform::drbd::runtime_service_enable
  include ::platform::drbd::extension
 }


 class platform::drbd::patch_vault::runtime {
  include ::platform::drbd::params
+  include ::platform::drbd::runtime_service_enable
  include ::platform::drbd::patch_vault
 }

 class platform::drbd::etcd::runtime {
  include ::platform::drbd::params
+  include ::platform::drbd::runtime_service_enable
  include ::platform::drbd::etcd
 }

 class platform::drbd::dockerdistribution::runtime {
  include ::platform::drbd::params
+  include ::platform::drbd::runtime_service_enable
  include ::platform::drbd::dockerdistribution
 }

 class platform::drbd::cephmon::runtime {
  include ::platform::drbd::params
+  include ::platform::drbd::runtime_service_enable
  include ::platform::drbd::cephmon
 }
--- a/puppet-manifests/src/modules/platform/templates/ceph.osd.create.erb
+++ b/puppet-manifests/src/modules/platform/templates/ceph.osd.create.erb
@ -0,0 +1,52 @@
+/bin/true # puppet requires this for correct template parsing
+
+# This is needed to pin a specific OSD id with a corresponding UUID.
+# Problem is ceph-disk prepare doesn't accept ceph OSD id as cli
+# parameter. Therefore, the OSD with desired OSD ID and UUID must
+# exist before puppet ceph module executes ceph-disk prepare.
+
+set -x
+
+osd_id=<%= @osd_id %>
+osd_uuid=<%= @osd_uuid %>
+
+# Ignore if Ceph is down, this case should only happen on DOR
+timeout 10 ceph -s
+if [ $? -ne 0 ]; then
+    exit 0
+fi
+
+# Check if OSD exists and has the correct UUID
+osds=( $(ceph osd ls) )
+if [[ " ${osds[@]} " =~ " ${osd_id} " ]]; then
+    # Get UUID, this is slower than osd ls as it also lists PGs with problems
+    # but is the only way to get the uuid of an OSD.
+    found_uuid=$(ceph osd dump | grep "^osd.${osd_id} " | awk '{print $NF}')
+    if [ "${found_uuid}" != "${osd_uuid}" ]; then
+        # At B&R ceph's crushmap is restored but, although OSDs are properly
+        # allocated to their hosts in the tree, crushmap does not store
+        # OSD UUIDs. Therefore, w/o osd_id and uuid match, when the OSD is
+        # prepared there is a chance that ceph-disk will create a new OSD
+        # that will no longer match the osd id in sysinv db. So, we have
+        # to remove OSDs that don't match UUIDs and recreate them with
+        # expected OSD ID and UUID so that ceph-disk does not get confused.
+        ceph osd rm ${osd_id}
+        RET=$?
+        if [ $RET -ne 0 ]; then
+            echo "Error removing osd ${osd_id}, exit code: ${RET}"
+            exit $RET
+        fi
+    else
+        # OSD exists and has the correct uuid
+        exit 0
+    fi
+fi
+
+# Create the OSD with desired id and uuid
+ceph osd create ${osd_uuid} ${osd_id}
+RET=$?
+if [ $RET -ne 0 ]; then
+    echo "Error creating osd ${osd_id}, exit code: ${RET}"
+    exit $RET
+fi
+set +x
--- a/sysinv/cgts-client/cgts-client/cgtsclient/v1/storage_backend.py
+++ b/sysinv/cgts-client/cgts-client/cgtsclient/v1/storage_backend.py
@ -11,8 +11,6 @@ from cgtsclient.common import base
 from cgtsclient.common import constants
 from cgtsclient.common import utils
 from cgtsclient import exc
-from cgtsclient.v1 import ceph_mon as ceph_mon_utils
-from cgtsclient.v1 import ihost as ihost_utils
 from cgtsclient.v1 import storage_ceph  # noqa
 from cgtsclient.v1 import storage_ceph_external  # noqa
 from cgtsclient.v1 import storage_external  # noqa
@ -138,23 +136,6 @@ def _display_next_steps():
 def backend_add(cc, backend, args):
    backend = backend.replace('-', '_')

-    # add ceph mons to controllers
-    if backend == constants.SB_TYPE_CEPH:
-        # Controllers should always have monitors.
-        # Not finding a controller means it's not yet configured,
-        # so move forward.
-        try:
-            ihost = ihost_utils._find_ihost(cc, constants.CONTROLLER_0_HOSTNAME)
-            ceph_mon_utils.ceph_mon_add(cc, args, ihost.uuid)
-        except exc.CommandError:
-            pass
-
-        try:
-            ihost = ihost_utils._find_ihost(cc, constants.CONTROLLER_1_HOSTNAME)
-            ceph_mon_utils.ceph_mon_add(cc, args, ihost.uuid)
-        except exc.CommandError:
-            pass
-
    # allowed storage_backend fields
    allowed_fields = ['name', 'services', 'confirmed', 'ceph_conf']

--- a/sysinv/sysinv/sysinv/sysinv/agent/manager.py
+++ b/sysinv/sysinv/sysinv/sysinv/agent/manager.py
@ -512,6 +512,10 @@ class AgentManager(service.PeriodicService):
            msg_dict.update({constants.HOST_ACTION_STATE:
                             constants.HAS_REINSTALLED})

+        # Is this the first time since boot we are reporting to conductor?
+        msg_dict.update({constants.SYSINV_AGENT_FIRST_REPORT:
+                         not os.path.exists(SYSINV_FIRST_REPORT_FLAG)})
+
        try:
            rpcapi.iplatform_update_by_ihost(context,
                                             host_uuid,
@ -916,10 +920,6 @@ class AgentManager(service.PeriodicService):
                if iscsi_initiator_name is not None:
                    imsg_dict.update({'iscsi_initiator_name': iscsi_initiator_name})

-                # Is this the first time since boot we are reporting to conductor?
-                imsg_dict.update({constants.SYSINV_AGENT_FIRST_REPORT:
-                                  not os.path.exists(SYSINV_FIRST_REPORT_FLAG)})
-
                self.platform_update_by_host(rpcapi,
                                             icontext,
                                             self._ihost_uuid,
--- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/ceph_mon.py
+++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/ceph_mon.py
@ -446,9 +446,10 @@ def _create(ceph_mon):
                  "replication is set to: %s'. Please update replication "
                  "before configuring a monitor on a worker node." % supported_replication))

-    # host must be locked and online
-    if (chost['availability'] != constants.AVAILABILITY_ONLINE or
-            chost['administrative'] != constants.ADMIN_LOCKED):
+    # host must be locked and online unless this is controller-0
+    if (chost['hostname'] != constants.CONTROLLER_0_HOSTNAME and
+            (chost['availability'] != constants.AVAILABILITY_ONLINE or
+            chost['administrative'] != constants.ADMIN_LOCKED)):
        raise wsme.exc.ClientSideError(
            _("Host %s must be locked and online." % chost['hostname']))

--- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py
+++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py
@ -5295,7 +5295,7 @@ class HostController(rest.RestController):
            pass
        elif StorageBackendConfig.has_backend_configured(
                pecan.request.dbapi,
-                constants.CINDER_BACKEND_CEPH):
+                constants.SB_TYPE_CEPH):
            if utils.is_aio_simplex_system(pecan.request.dbapi):
                # Check if host has enough OSDs configured for each tier
                tiers = pecan.request.dbapi.storage_tier_get_all()
--- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/storage.py
+++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/storage.py
@ -471,23 +471,33 @@ class StorageController(rest.RestController):
        except Exception as e:
            LOG.exception(e)
            raise
-
        # Make sure that we are allowed to delete
        _check_host(stor)

        # Delete the stor if supported
+        ihost_id = stor['forihostid']
+        ihost = pecan.request.dbapi.ihost_get(ihost_id)
        if stor.function == constants.STOR_FUNCTION_JOURNAL:
            # Host must be locked
-            ihost_id = stor['forihostid']
-            ihost = pecan.request.dbapi.ihost_get(ihost_id)
            if ihost['administrative'] != constants.ADMIN_LOCKED:
                raise wsme.exc.ClientSideError(_("Host %s must be locked." %
                                                ihost['hostname']))
            self.delete_stor(stor_uuid)
+        elif (stor.function == constants.STOR_FUNCTION_OSD and
+              stor.state == constants.SB_STATE_CONFIGURING_ON_UNLOCK):
+            # Host must be locked
+            if ihost['administrative'] != constants.ADMIN_LOCKED:
+                raise wsme.exc.ClientSideError(_("Host %s must be locked." %
+                                                ihost['hostname']))
+
+            self.delete_stor(stor_uuid)
        else:
            raise wsme.exc.ClientSideError(_(
-                   "Deleting a Storage Function other than %s is not "
-                   "supported on this setup") % constants.STOR_FUNCTION_JOURNAL)
+                   "Deleting a Storage Function other than '%s' and '%s' in "
+                   "state '%s' is not supported on this setup.") %
+                        (constants.STOR_FUNCTION_JOURNAL,
+                         constants.STOR_FUNCTION_OSD,
+                         constants.SB_STATE_CONFIGURING_ON_UNLOCK))

    def delete_stor(self, stor_uuid):
        """Delete a stor"""
@ -497,10 +507,10 @@ class StorageController(rest.RestController):
        try:
            # The conductor will handle removing the stor, not all functions
            # need special handling
-            if stor.function == constants.STOR_FUNCTION_OSD:
-                pecan.request.rpcapi.unconfigure_osd_istor(pecan.request.context,
-                                                           stor)
-            elif stor.function == constants.STOR_FUNCTION_JOURNAL:
+            # if stor.function == constants.STOR_FUNCTION_OSD:
+            #     pecan.request.rpcapi.unconfigure_osd_istor(pecan.request.context,
+            #                                                stor)
+            if stor.function == constants.STOR_FUNCTION_JOURNAL:
                pecan.request.dbapi.istor_disable_journal(stor_uuid)
            # Now remove the stor from DB
            pecan.request.dbapi.istor_remove_disk_association(stor_uuid)
@ -901,29 +911,21 @@ def _create(stor, iprofile=None):
                "Invalid stor device type: only SSD and NVME devices are supported"
                " for journal functions."))

+    if osd_create is True:
+        # Get the next free OSD ID in the system
+        stors = pecan.request.dbapi.istor_get_list(sort_key='osdid', sort_dir='asc')
+        stors_ids = [s['osdid'] for s in stors if s['osdid'] is not None]
+        if stors_ids:
+            candidate_ids = [i for i in range(0, stors_ids[-1] + 2) if i not in stors_ids]
+            create_attrs['osdid'] = candidate_ids[0]
+        else:
+            create_attrs['osdid'] = 0
+    else:
+        create_attrs['osdid'] = None
+
    new_stor = pecan.request.dbapi.istor_create(forihostid,
                                                create_attrs)

-    # Create an osd associated with disk.
-    if osd_create is True:
-        try:
-            new_stor = pecan.request.rpcapi.configure_osd_istor(
-                pecan.request.context, new_stor)
-        except Exception as cpe:
-            LOG.exception(cpe)
-            # Delete the partially configure istor
-            pecan.request.dbapi.istor_destroy(new_stor.uuid)
-            raise wsme.exc.ClientSideError(_(
-                "Internal error: failed to create a storage object. "
-                "Make sure storage cluster is up and healthy."))
-
-        if iprofile:
-            new_stor = pecan.request.dbapi.istor_update(new_stor.uuid,
-                                                        {'osdid': None})
-        else:
-            # Update the database record
-            new_stor.save(pecan.request.context)
-
    # Associate the disk to db record
    values = {'foristorid': new_stor.id}
    pecan.request.dbapi.idisk_update(idisk_uuid,
--- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/storage_ceph.py
+++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/storage_ceph.py
@ -339,7 +339,6 @@ class StorageCephController(rest.RestController):
    @wsme_pecan.wsexpose(None, types.uuid, status_code=204)
    def delete(self, storageceph_uuid):
        """Delete a backend."""
-
        return _delete(storageceph_uuid)


@ -696,14 +695,8 @@ def _apply_backend_changes(op, sb_obj):
    services = api_helper.getListFromServices(sb_obj.as_dict())

    if op == constants.SB_API_OP_CREATE:
-        if sb_obj.name == constants.SB_DEFAULT_NAMES[
+        if sb_obj.name != constants.SB_DEFAULT_NAMES[
                constants.SB_TYPE_CEPH]:
-            # Apply manifests for primary tier
-            pecan.request.rpcapi.update_ceph_config(pecan.request.context,
-                                                    sb_obj.uuid,
-                                                    services)
-
-        else:
            # Enable the service(s) use of the backend
            if constants.SB_SVC_CINDER in services:
                pecan.request.rpcapi.update_ceph_services(
@ -800,10 +793,21 @@ def _set_defaults(storage_ceph):
        'kube_pool_gib': None,
        'object_gateway': False,
    }
+
    sc = api_helper.set_backend_data(storage_ceph,
                                     defaults,
                                     CAPABILITIES,
                                     constants.SB_CEPH_SVCS_SUPPORTED)
+
+    # Ceph is our default storage backend and is added at configuration
+    # set state and task accordingly.
+    if sc['name'] == constants.SB_DEFAULT_NAMES[constants.SB_TYPE_CEPH]:
+        sc['state'] = constants.SB_STATE_CONFIGURED
+        if utils.is_aio_simplex_system(pecan.request.dbapi):
+            sc['task'] = None
+        else:
+            sc['task'] = constants.SB_TASK_RECONFIG_CONTROLLER
+
    return sc


--- a/sysinv/sysinv/sysinv/sysinv/common/ceph.py
+++ b/sysinv/sysinv/sysinv/sysinv/common/ceph.py
@ -24,6 +24,8 @@ import pecan
 import os
 import requests

+from sysinv.api.controllers.v1.utils import is_aio_system
+
 LOG = logging.getLogger(__name__)


@ -704,11 +706,36 @@ class CephApiOperator(object):

 def fix_crushmap(dbapi=None):
    """ Set Ceph's CRUSH Map based on storage model """
+    def _create_crushmap_flag_file():
+        try:
+            open(crushmap_flag_file, "w").close()
+        except IOError as e:
+            LOG.warn(_('Failed to create flag file: {}. '
+                       'Reason: {}').format(crushmap_flag_file, e))
+
    if not dbapi:
        dbapi = pecan.request.dbapi
    crushmap_flag_file = os.path.join(constants.SYSINV_CONFIG_PATH,
                                      constants.CEPH_CRUSH_MAP_APPLIED)
+
    if not os.path.isfile(crushmap_flag_file):
+        _operator = CephApiOperator()
+        if not is_aio_system(dbapi):
+            # At least two monitors have to be running on a standard deployment,
+            # otherwise don't even try to load the crushmap.
+            active_mons, required_mons, __ = _operator.get_monitors_status(dbapi)
+            if required_mons > active_mons:
+                LOG.info("Not enough monitors yet available to fix crushmap.")
+                return False
+
+        # Crushmap may be already loaded thorough puppet, avoid doing it twice.
+        default_ceph_tier_name = constants.SB_TIER_DEFAULT_NAMES[
+                constants.SB_TIER_TYPE_CEPH] + constants.CEPH_CRUSH_TIER_SUFFIX
+        rule_is_present, __, __ = _operator._crush_rule_status(default_ceph_tier_name)
+        if rule_is_present:
+            _create_crushmap_flag_file()
+            return False
+
        stor_model = get_ceph_storage_model(dbapi)
        if stor_model == constants.CEPH_AIO_SX_MODEL:
            crushmap_txt = "/etc/sysinv/crushmap-aio-sx.txt"
@ -731,13 +758,10 @@ def fix_crushmap(dbapi=None):
            # May not be critical, depends on where this is called.
            reason = "Error: %s Output: %s" % (str(e), e.output)
            raise exception.CephCrushMapNotApplied(reason=reason)
-        try:
-            open(crushmap_flag_file, "w").close()
-        except IOError as e:
-            LOG.warn(_('Failed to create flag file: {}. '
-                       'Reason: {}').format(crushmap_flag_file, e))
+        _create_crushmap_flag_file()

        return True
+    return False


 def get_ceph_storage_model(dbapi=None):
--- a/sysinv/sysinv/sysinv/sysinv/common/storage_backend_conf.py
+++ b/sysinv/sysinv/sysinv/sysinv/common/storage_backend_conf.py
@ -220,6 +220,7 @@ class StorageBackendConfig(object):
        }

        ceph_mons = dbapi.ceph_mon_get_list()
+        ceph_mon = None
        for ceph_mon in ceph_mons:
            if ceph_mon['hostname'] == constants.CONTROLLER_0_HOSTNAME:
                targets.update({'%s-%s' % (constants.CONTROLLER_0_HOSTNAME,
@ -231,8 +232,6 @@ class StorageBackendConfig(object):
                targets.update({'%s-%s' % (ceph_mon['hostname'],
                                           network_type): 'ceph-mon-2-ip'})

-        ceph_mon['ceph_mon_gib'] = ceph_mons[0]['ceph_mon_gib']
-
        results = {}
        addrs = dbapi.addresses_get_all()
        for addr in addrs:
--- a/sysinv/sysinv/sysinv/sysinv/conductor/ceph.py
+++ b/sysinv/sysinv/sysinv/sysinv/conductor/ceph.py
@ -124,7 +124,6 @@ class CephOperator(object):
        # cluster UUID value that is valid and consistent for the state of the
        # installation. Also make sure that we have a cluster DB entry
        # established
-        LOG.debug("_init_db_cluster_and_tier: Reteiving cluster record")
        try:
            self._db_cluster = self._db_api.clusters_get_all(
                type=constants.CINDER_BACKEND_CEPH)[0]
@ -132,7 +131,7 @@ class CephOperator(object):
                # Retrieve ceph cluster fsid and update database
                fsid = self._get_fsid()
                if uuidutils.is_uuid_like(fsid):
-                    LOG.debug("Update cluster record: fsid=%s." % fsid)
+                    LOG.info("Update cluster record: fsid=%s." % fsid)
                    self._db_cluster.cluster_uuid = fsid
                    self._db_api.cluster_update(
                        self.cluster_db_uuid,
@ -155,7 +154,7 @@ class CephOperator(object):

        # Try to use ceph cluster fsid
        fsid = self._get_fsid()
-        LOG.info("Create new cluster record: fsid=%s." % fsid)
+        LOG.info("Create new ceph cluster record: fsid=%s." % fsid)
        # Create the default primary cluster
        self._db_cluster = self._db_api.cluster_create(
            {'uuid': fsid if uuidutils.is_uuid_like(fsid) else str(uuid.uuid4()),
@ -165,6 +164,7 @@ class CephOperator(object):
             'system_id': isystem.id})

        # Create the default primary ceph storage tier
+        LOG.info("Create primary ceph tier record.")
        self._db_primary_tier = self._db_api.storage_tier_create(
            {'forclusterid': self.cluster_id,
             'name': constants.SB_TIER_DEFAULT_NAMES[constants.SB_TIER_TYPE_CEPH],
@ -831,67 +831,6 @@ class CephOperator(object):
                    name=rule_name, reason=body['status'])
                raise e

-    # TODO(CephPoolsDecouple): remove
-    def configure_osd_pools(self, ceph_backend=None, new_pool_size=None, new_pool_min_size=None):
-        """Create or resize all of the osd pools as needed
-           ceph backend could be 2nd backend which is in configuring state
-        """
-        # Handle pools for multiple tiers
-        tiers = self._db_api.storage_tier_get_by_cluster(self.cluster_db_uuid)
-        ceph_tiers = [t for t in tiers if t.type == constants.SB_TIER_TYPE_CEPH]
-        ceph_backends = self._db_api.storage_ceph_get_list()
-
-        for t in ceph_tiers:
-            # Get corresponding ceph backend for the tier, if any
-            bk = None
-            for bk in ceph_backends:
-                if t.forbackendid == bk.id:
-                    break
-
-            # Get pool replication parameters
-            pool_size, pool_min_size = StorageBackendConfig.get_ceph_pool_replication(self._db_api, bk)
-            if bk and ceph_backend and bk.name == ceph_backend.name:
-                # Override replication
-                pool_size = new_pool_size if new_pool_size else pool_size
-                pool_min_size = new_pool_min_size if new_pool_min_size else pool_min_size
-
-            # Configure tier OSD pools
-            if t.uuid == self.primary_tier_uuid:
-                # This is primary tier
-                # In case we're updating pool_size to a different value than
-                # default. Just update pool size for ceph's default pool 'rbd'
-                # as well
-                try:
-                    self._configure_primary_tier_pool(
-                        {'pool_name': constants.CEPH_POOL_RBD_NAME,
-                         'pg_num': constants.CEPH_POOL_RBD_PG_NUM,
-                         'pgp_num': constants.CEPH_POOL_RBD_PGP_NUM},
-                        pool_size,
-                        pool_min_size)
-                except exception.CephFailure:
-                    pass
-
-                # Handle primary tier pools (cinder/glance/swift/ephemeral)
-                for pool in CEPH_POOLS:
-                    # TODO(rchurch): The following is added for R3->R4 upgrades. Can we
-                    # remove this for R5? Or is there some R3->R4->R5 need to keep this
-                    # around.
-                    try:
-                        self.update_ceph_object_pool_name(pool)
-                    except exception.CephFailure:
-                        pass
-
-                    self._configure_primary_tier_pool(pool, pool_size,
-                                                      pool_min_size)
-            else:
-                try:
-                    self._configure_secondary_tier_pools(t, pool_size,
-                                                         pool_min_size)
-                except exception.CephPoolRulesetFailure as e:
-                    LOG.info("Cannot add pools: %s" % e)
-                except exception.CephFailure as e:
-                    LOG.info("Cannot add pools: %s" % e)
-
    def _update_db_capabilities(self, bk, new_storceph):
        # Avoid updating DB for all capabilities in new_storceph as we
        # don't manage them. Leave the callers deal with it.
--- a/sysinv/sysinv/sysinv/sysinv/conductor/manager.py
+++ b/sysinv/sysinv/sysinv/sysinv/conductor/manager.py
@ -283,15 +283,6 @@ class ConductorManager(service.PeriodicService):
        self.dbapi.remotelogging_create(system_id_attribute_value)
        self.dbapi.ptp_create(system_id_attribute_value)

-        # set default storage_backend
-        values.update({'backend': constants.SB_TYPE_FILE,
-                       'name': constants.SB_DEFAULT_NAMES[constants.SB_TYPE_FILE],
-                       'state': constants.SB_STATE_CONFIGURED,
-                       'task': constants.SB_TASK_NONE,
-                       'services': None,
-                       'capabilities': {}})
-        self.dbapi.storage_backend_create(values)
-
        # populate service table
        for optional_service in constants.ALL_OPTIONAL_SERVICES:
            self.dbapi.service_create({'name': optional_service,
@ -4378,8 +4369,9 @@ class ConductorManager(service.PeriodicService):

        if availability == constants.AVAILABILITY_AVAILABLE:
            if imsg_dict.get(constants.SYSINV_AGENT_FIRST_REPORT):
-                # This should be run once after a boot
+                # This should be run once after a node boot
                self._clear_ceph_stor_state(ihost_uuid)
+                cceph.fix_crushmap(self.dbapi)
            config_uuid = imsg_dict['config_applied']
            self._update_host_config_applied(context, ihost, config_uuid)

@ -5215,39 +5207,6 @@ class ConductorManager(service.PeriodicService):
        # Not sure yet what the proper response is here
        pass

-    def configure_osd_istor(self, context, istor_obj):
-        """Synchronously, have a conductor configure an OSD istor.
-
-        Does the following tasks:
-        - Allocates an OSD.
-        - Creates or resizes an OSD pool as necessary.
-
-        :param context: request context.
-        :param istor_obj: an istor object.
-        :returns: istor object, with updated osdid
-        """
-
-        if istor_obj['osdid']:
-            LOG.error("OSD already assigned: %s", str(istor_obj['osdid']))
-            raise exception.SysinvException(_(
-                "Invalid method call: osdid already assigned: %s") %
-                    str(istor_obj['osdid']))
-
-        # Create the OSD
-        response, body = self._ceph.osd_create(istor_obj['uuid'], body='json')
-        if not response.ok:
-            LOG.error("OSD create failed: %s", response.reason)
-            response.raise_for_status()
-
-        # Update the osdid in the stor object
-        istor_obj['osdid'] = body['output']['osdid']
-
-        # TODO(CephPoolsDecouple): remove
-        if not utils.is_kubernetes_config(self.dbapi):
-            self._ceph.configure_osd_pools()
-
-        return istor_obj
-
    def restore_ceph_config(self, context, after_storage_enabled=False):
        """Restore Ceph configuration during Backup and Restore process.

--- a/sysinv/sysinv/sysinv/sysinv/puppet/ceph.py
+++ b/sysinv/sysinv/sysinv/sysinv/puppet/ceph.py
@ -47,6 +47,9 @@ class CephPuppet(openstack.OpenstackBasePuppet):
        ceph_mon_ips = StorageBackendConfig.get_ceph_mon_ip_addresses(
            self.dbapi)

+        if not ceph_mon_ips:
+            return {}  # system configuration is not yet ready
+
        controller_hosts = [constants.CONTROLLER_0_HOSTNAME, constants.CONTROLLER_1_HOSTNAME]
        mon_2_host = [mon['hostname'] for mon in self.dbapi.ceph_mon_get_list() if
                      mon['hostname'] not in controller_hosts]