Merge "Make Ceph the default Storage Backend"

2019-04-17 21:48:29 +00:00 · 2019-04-17 21:48:29 +00:00 · 5070f6491b
parent 3cd87d03f6 fcf3fbf5f4
commit 5070f6491b
20 changed files with 241 additions and 249 deletions
--- a/controllerconfig/controllerconfig/controllerconfig/configassistant.py
+++ b/controllerconfig/controllerconfig/controllerconfig/configassistant.py
@ -5077,6 +5077,15 @@ class ConfigAssistant():
            }
            client.sysinv.route.create(**values)

+    def _populate_default_storage_backend(self, client, controller):
+        # Create the Ceph monitor for controller-0
+        values = {'ihost_uuid': controller.uuid}
+        client.sysinv.ceph_mon.create(**values)
+
+        # Create the Ceph default backend
+        values = {'confirmed': True}
+        client.sysinv.storage_ceph.create(**values)
+
    def _populate_infrastructure_interface(self, client, controller):
        """Configure the infrastructure interface(s)"""
        if not self.infrastructure_interface:
@ -5349,6 +5358,7 @@ class ConfigAssistant():
                # ceph_mon config requires controller host to be created
                self._inventory_config_complete_wait(client, controller)
                self._populate_interface_config(client, controller)
+                self._populate_default_storage_backend(client, controller)

        except (KeystoneFail, SysInvFail) as e:
            LOG.exception(e)
--- a/playbookconfig/playbookconfig/playbooks/bootstrap/roles/persist-config/files/populate_initial_config.py
+++ b/playbookconfig/playbookconfig/playbooks/bootstrap/roles/persist-config/files/populate_initial_config.py
@ -19,8 +19,8 @@ import time
 # constants in controllerconfig. When it is time to remove/deprecate these
 # packages, classes OpenStack, Token and referenced constants need to be moved
 # to this standalone script.
-from controllerconfig import ConfigFail
 from controllerconfig.common import constants
+from controllerconfig import ConfigFail
 from controllerconfig import openstack
 from controllerconfig import sysinv_api as sysinv

@ -711,6 +711,19 @@ def inventory_config_complete_wait(client, controller):
    wait_pv_config(client, controller)


+def populate_default_storage_backend(client, controller):
+    if not INITIAL_POPULATION:
+        return
+
+    print("Populating ceph-mon config for controller-0...")
+    values = {'ihost_uuid': controller.uuid}
+    client.sysinv.ceph_mon.create(**values)
+
+    print("Populating ceph storage backend config...")
+    values = {'confirmed': True}
+    client.sysinv.storage_ceph.create(**values)
+
+
 def handle_invalid_input():
    raise Exception("Invalid input!\nUsage: <bootstrap-config-file> "
                    "[--system] [--network] [--service]")
@ -757,6 +770,7 @@ if __name__ == '__main__':
            populate_docker_config(client)
            controller = populate_controller_config(client)
            inventory_config_complete_wait(client, controller)
+            populate_default_storage_backend(client, controller)
            os.remove(config_file)
            if INITIAL_POPULATION:
                print("Successfully updated the initial system config.")
--- a/puppet-manifests/src/modules/platform/lib/facter/is_initial_cinder_ceph_config.rb
+++ b/puppet-manifests/src/modules/platform/lib/facter/is_initial_cinder_ceph_config.rb
@ -1,8 +0,0 @@
-# Returns true if cinder ceph needs to be configured
-
-Facter.add("is_initial_cinder_ceph_config") do
-  setcode do
-    conf_path = Facter::Core::Execution.exec("hiera --config /etc/puppet/hiera.yaml platform::params::config_path")
-    ! File.exist?(conf_path +'.initial_cinder_ceph_config_complete')
-  end
-end
--- a/puppet-manifests/src/modules/platform/lib/facter/is_initial_cinder_config.rb
+++ b/puppet-manifests/src/modules/platform/lib/facter/is_initial_cinder_config.rb
@ -1,8 +0,0 @@
-# Returns true is this is the initial cinder config for this system
-
-Facter.add("is_initial_cinder_config") do
-  setcode do
-    conf_path = Facter::Core::Execution.exec("hiera --config /etc/puppet/hiera.yaml platform::params::config_path")
-    ! File.exist?(conf_path + '.initial_cinder_config_complete')
-  end
-end
--- a/puppet-manifests/src/modules/platform/lib/facter/is_initial_cinder_lvm_config.rb
+++ b/puppet-manifests/src/modules/platform/lib/facter/is_initial_cinder_lvm_config.rb
@ -1,8 +0,0 @@
-# Returns true if cinder lvm needs to be configured
-
-Facter.add("is_initial_cinder_lvm_config") do
-  setcode do
-    conf_path = Facter::Core::Execution.exec("hiera --config /etc/puppet/hiera.yaml platform::params::config_path")
-    ! File.exist?(conf_path + '.initial_cinder_lvm_config_complete')
-  end
-end
--- a/puppet-manifests/src/modules/platform/lib/facter/is_node_cinder_lvm_config.rb
+++ b/puppet-manifests/src/modules/platform/lib/facter/is_node_cinder_lvm_config.rb
@ -1,7 +0,0 @@
-# Returns true if cinder LVM needs to be configured on current node
-
-Facter.add("is_node_cinder_lvm_config") do
-  setcode do
-    ! File.exist?('/etc/platform/.node_cinder_lvm_config_complete')
-  end
-end
--- a/puppet-manifests/src/modules/platform/manifests/ceph.pp
+++ b/puppet-manifests/src/modules/platform/manifests/ceph.pp
@ -153,11 +153,11 @@ class platform::ceph::post
  }

  if $service_enabled {
+    # Ceph configuration on this node is done
    file { $node_ceph_configured_flag:
      ensure => present
    }
  }
-
 }


@ -169,8 +169,8 @@ class platform::ceph::monitor

  if $service_enabled {
    if $system_type == 'All-in-one' and 'duplex' in $system_mode {
-      if str2bool($::is_controller_active) {
-        # Ceph mon is configured on a DRBD partition, on the active controller,
+      if str2bool($::is_standalone_controller) {
+        # Ceph mon is configured on a DRBD partition,
        # when 'ceph' storage backend is added in sysinv.
        # Then SM takes care of starting ceph after manifests are applied.
        $configure_ceph_mon = true
@ -236,6 +236,31 @@ class platform::ceph::monitor
    # ensure configuration is complete before creating monitors
    Class['::ceph'] -> Ceph::Mon <| |>

+    # ensure we load the crushmap at first unlock
+    if $system_type == 'All-in-one' and str2bool($::is_standalone_controller) {
+      if 'duplex' in $system_mode {
+        $crushmap_txt = '/etc/sysinv/crushmap-controller-model.txt'
+      } else {
+        $crushmap_txt = '/etc/sysinv/crushmap-aio-sx.txt'
+      }
+      $crushmap_bin = '/etc/sysinv/crushmap.bin'
+      Ceph::Mon <| |>
+      -> exec { 'Compile crushmap':
+        command   => "crushtool -c ${crushmap_txt} -o ${crushmap_bin}",
+        onlyif    => "test ! -f ${crushmap_bin}",
+        logoutput => true,
+      }
+      -> exec { 'Set crushmap':
+        command   => "ceph osd setcrushmap -i ${crushmap_bin}",
+        unless    => 'ceph osd crush rule list --format plain | grep -e "storage_tier_ruleset"',
+        logoutput => true,
+      }
+      -> Platform_ceph_osd <| |>
+    }
+
+    # Ensure networking is up before Monitors are configured
+    Anchor['platform::networking'] -> Ceph::Mon <| |>
+
    # default configuration for all ceph monitor resources
    Ceph::Mon {
      fsid => $cluster_uuid,
@ -248,33 +273,10 @@ class platform::ceph::monitor
        public_addr => $floating_mon_ip,
      }

-      if (str2bool($::is_controller_active) and
-          str2bool($::is_initial_cinder_ceph_config) and
-          !str2bool($::is_standalone_controller)) {
+      # On AIO-DX there is a single, floating, Ceph monitor backed by DRBD.
+      # Therefore DRBD must be up before Ceph monitor is configured
+      Drbd::Resource <| |> -> Ceph::Mon <| |>

-
-        # When we configure ceph after both controllers are active,
-        # we need to stop the monitor, unmount the monitor partition
-        # and set the drbd role to secondary, so that the handoff to
-        # SM is done properly once we swact to the standby controller.
-        # TODO: Remove this once SM supports in-service config reload.
-        Ceph::Mon <| |>
-        -> exec { 'Stop Ceph monitor':
-          command   =>'/etc/init.d/ceph stop mon',
-          onlyif    => '/etc/init.d/ceph status mon',
-          logoutput => true,
-        }
-        -> exec { 'umount ceph-mon partition':
-          command   => "umount ${mon_mountpoint}",
-          onlyif    => "mount | grep -q ${mon_mountpoint}",
-          logoutput => true,
-        }
-        -> exec { 'Set cephmon secondary':
-          command   => 'drbdadm secondary drbd-cephmon',
-          unless    => "drbdadm role drbd-cephmon | egrep '^Secondary'",
-          logoutput => true,
-        }
-      }
    } else {
      if $::hostname == $mon_0_host {
        ceph::mon { $mon_0_host:
@ -295,8 +297,7 @@ class platform::ceph::monitor
  }
 }

-
-define platform_ceph_osd(
+define osd_crush_location(
  $osd_id,
  $osd_uuid,
  $disk_path,
@ -311,12 +312,28 @@ define platform_ceph_osd(
      "osd.${$osd_id}/crush_location": value => "root=${tier_name}-tier host=${$::platform::params::hostname}-${$tier_name}";
    }
  }
-  file { "/var/lib/ceph/osd/ceph-${osd_id}":
+}
+
+define platform_ceph_osd(
+  $osd_id,
+  $osd_uuid,
+  $disk_path,
+  $data_path,
+  $journal_path,
+  $tier_name,
+) {
+
+  Anchor['platform::networking']  # Make sure networking is up before running ceph commands
+  -> file { "/var/lib/ceph/osd/ceph-${osd_id}":
    ensure => 'directory',
    owner  => 'root',
    group  => 'root',
    mode   => '0755',
  }
+  -> exec { "ceph osd create ${osd_uuid} ${osd_id}":
+    logoutput => true,
+    command   => template('platform/ceph.osd.create.erb'),
+  }
  -> ceph::osd { $disk_path:
    uuid => $osd_uuid,
  }
@ -351,8 +368,13 @@ class platform::ceph::osds(
    mode   => '0755',
  }

+  # Ensure ceph.conf is complete before configuring OSDs
+  Class['::ceph'] -> Platform_ceph_osd <| |>
+
  # Journal disks need to be prepared before the OSDs are configured
  Platform_ceph_journal <| |> -> Platform_ceph_osd <| |>
+  # Crush locations in ceph.conf need to be set before the OSDs are configured
+  Osd_crush_location <| |> -> Platform_ceph_osd <| |>

  # default configuration for all ceph object resources
  Ceph::Osd {
@ -360,6 +382,7 @@ class platform::ceph::osds(
    cluster_uuid => $cluster_uuid,
  }

+  create_resources('osd_crush_location', $osd_config)
  create_resources('platform_ceph_osd', $osd_config)
  create_resources('platform_ceph_journal', $journal_config)
 }
@ -479,6 +502,7 @@ class platform::ceph::runtime_base {

 class platform::ceph::runtime_osds {
  include ::ceph::params
+  include ::platform::ceph
  include ::platform::ceph::osds

  # Since this is runtime we have to avoid checking status of Ceph while we
--- a/puppet-manifests/src/modules/platform/manifests/drbd.pp
+++ b/puppet-manifests/src/modules/platform/manifests/drbd.pp
@ -445,16 +445,13 @@ class platform::drbd::cephmon ()
  $system_mode = $::platform::params::system_mode
  $system_type = $::platform::params::system_type

-  #TODO: This will change once we remove the native cinder service
-  if (str2bool($::is_initial_config_primary) or
-      (str2bool($::is_controller_active) and str2bool($::is_initial_cinder_ceph_config))
-  ){
+  if str2bool($::is_standalone_controller) and ! str2bool($::is_node_ceph_configured) {
    # Active controller, first time configuration.
    $drbd_primary = true
    $drbd_initial = true
    $drbd_automount = true

-  } elsif str2bool($::is_standalone_controller){
+  } elsif str2bool($::is_standalone_controller) {
    # Active standalone controller, successive reboots.
    $drbd_primary = true
    $drbd_initial = undef
@ -490,9 +487,9 @@ class platform::drbd(
  $service_enable = false,
  $service_ensure = 'stopped',
 ) {
-  if (str2bool($::is_initial_config_primary)
+  if (str2bool($::is_initial_config_primary) or str2bool($::is_standalone_controller)
  ){
-    # Enable DRBD at config_controller
+    # Enable DRBD on standalone
    class { '::drbd':
      service_enable => true,
      service_ensure => 'running',
@ -553,41 +550,55 @@ class platform::drbd::runtime {
  }
 }

+class platform::drbd::runtime_service_enable {
+
+  class { '::drbd':
+    service_enable => true,
+    service_ensure => 'running'
+  }
+}

 class platform::drbd::pgsql::runtime {
  include ::platform::drbd::params
+  include ::platform::drbd::runtime_service_enable
  include ::platform::drbd::pgsql
 }


 class platform::drbd::cgcs::runtime {
  include ::platform::drbd::params
+  include ::platform::drbd::runtime_service_enable
  include ::platform::drbd::cgcs
 }


 class platform::drbd::extension::runtime {
  include ::platform::drbd::params
+  include ::platform::drbd::runtime_service_enable
  include ::platform::drbd::extension
 }


 class platform::drbd::patch_vault::runtime {
  include ::platform::drbd::params
+  include ::platform::drbd::runtime_service_enable
  include ::platform::drbd::patch_vault
 }

 class platform::drbd::etcd::runtime {
  include ::platform::drbd::params
+  include ::platform::drbd::runtime_service_enable
  include ::platform::drbd::etcd
 }

 class platform::drbd::dockerdistribution::runtime {
  include ::platform::drbd::params
+  include ::platform::drbd::runtime_service_enable
  include ::platform::drbd::dockerdistribution
 }

 class platform::drbd::cephmon::runtime {
  include ::platform::drbd::params
+  include ::platform::drbd::runtime_service_enable
  include ::platform::drbd::cephmon
 }
--- a/puppet-manifests/src/modules/platform/templates/ceph.osd.create.erb
+++ b/puppet-manifests/src/modules/platform/templates/ceph.osd.create.erb
@ -0,0 +1,52 @@
+/bin/true # puppet requires this for correct template parsing
+
+# This is needed to pin a specific OSD id with a corresponding UUID.
+# Problem is ceph-disk prepare doesn't accept ceph OSD id as cli
+# parameter. Therefore, the OSD with desired OSD ID and UUID must
+# exist before puppet ceph module executes ceph-disk prepare.
+
+set -x
+
+osd_id=<%= @osd_id %>
+osd_uuid=<%= @osd_uuid %>
+
+# Ignore if Ceph is down, this case should only happen on DOR
+timeout 10 ceph -s
+if [ $? -ne 0 ]; then
+    exit 0
+fi
+
+# Check if OSD exists and has the correct UUID
+osds=( $(ceph osd ls) )
+if [[ " ${osds[@]} " =~ " ${osd_id} " ]]; then
+    # Get UUID, this is slower than osd ls as it also lists PGs with problems
+    # but is the only way to get the uuid of an OSD.
+    found_uuid=$(ceph osd dump | grep "^osd.${osd_id} " | awk '{print $NF}')
+    if [ "${found_uuid}" != "${osd_uuid}" ]; then
+        # At B&R ceph's crushmap is restored but, although OSDs are properly
+        # allocated to their hosts in the tree, crushmap does not store
+        # OSD UUIDs. Therefore, w/o osd_id and uuid match, when the OSD is
+        # prepared there is a chance that ceph-disk will create a new OSD
+        # that will no longer match the osd id in sysinv db. So, we have
+        # to remove OSDs that don't match UUIDs and recreate them with
+        # expected OSD ID and UUID so that ceph-disk does not get confused.
+        ceph osd rm ${osd_id}
+        RET=$?
+        if [ $RET -ne 0 ]; then
+            echo "Error removing osd ${osd_id}, exit code: ${RET}"
+            exit $RET
+        fi
+    else
+        # OSD exists and has the correct uuid
+        exit 0
+    fi
+fi
+
+# Create the OSD with desired id and uuid
+ceph osd create ${osd_uuid} ${osd_id}
+RET=$?
+if [ $RET -ne 0 ]; then
+    echo "Error creating osd ${osd_id}, exit code: ${RET}"
+    exit $RET
+fi
+set +x
--- a/sysinv/cgts-client/cgts-client/cgtsclient/v1/storage_backend.py
+++ b/sysinv/cgts-client/cgts-client/cgtsclient/v1/storage_backend.py
@ -11,8 +11,6 @@ from cgtsclient.common import base
 from cgtsclient.common import constants
 from cgtsclient.common import utils
 from cgtsclient import exc
-from cgtsclient.v1 import ceph_mon as ceph_mon_utils
-from cgtsclient.v1 import ihost as ihost_utils
 from cgtsclient.v1 import storage_ceph  # noqa
 from cgtsclient.v1 import storage_ceph_external  # noqa
 from cgtsclient.v1 import storage_external  # noqa
@ -138,23 +136,6 @@ def _display_next_steps():
 def backend_add(cc, backend, args):
    backend = backend.replace('-', '_')

-    # add ceph mons to controllers
-    if backend == constants.SB_TYPE_CEPH:
-        # Controllers should always have monitors.
-        # Not finding a controller means it's not yet configured,
-        # so move forward.
-        try:
-            ihost = ihost_utils._find_ihost(cc, constants.CONTROLLER_0_HOSTNAME)
-            ceph_mon_utils.ceph_mon_add(cc, args, ihost.uuid)
-        except exc.CommandError:
-            pass
-
-        try:
-            ihost = ihost_utils._find_ihost(cc, constants.CONTROLLER_1_HOSTNAME)
-            ceph_mon_utils.ceph_mon_add(cc, args, ihost.uuid)
-        except exc.CommandError:
-            pass
-
    # allowed storage_backend fields
    allowed_fields = ['name', 'services', 'confirmed', 'ceph_conf']

--- a/sysinv/sysinv/sysinv/sysinv/agent/manager.py
+++ b/sysinv/sysinv/sysinv/sysinv/agent/manager.py
@ -512,6 +512,10 @@ class AgentManager(service.PeriodicService):
            msg_dict.update({constants.HOST_ACTION_STATE:
                             constants.HAS_REINSTALLED})

+        # Is this the first time since boot we are reporting to conductor?
+        msg_dict.update({constants.SYSINV_AGENT_FIRST_REPORT:
+                         not os.path.exists(SYSINV_FIRST_REPORT_FLAG)})
+
        try:
            rpcapi.iplatform_update_by_ihost(context,
                                             host_uuid,
@ -916,10 +920,6 @@ class AgentManager(service.PeriodicService):
                if iscsi_initiator_name is not None:
                    imsg_dict.update({'iscsi_initiator_name': iscsi_initiator_name})

-                # Is this the first time since boot we are reporting to conductor?
-                imsg_dict.update({constants.SYSINV_AGENT_FIRST_REPORT:
-                                  not os.path.exists(SYSINV_FIRST_REPORT_FLAG)})
-
                self.platform_update_by_host(rpcapi,
                                             icontext,
                                             self._ihost_uuid,
--- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/ceph_mon.py
+++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/ceph_mon.py
@ -446,9 +446,10 @@ def _create(ceph_mon):
                  "replication is set to: %s'. Please update replication "
                  "before configuring a monitor on a worker node." % supported_replication))

-    # host must be locked and online
-    if (chost['availability'] != constants.AVAILABILITY_ONLINE or
-            chost['administrative'] != constants.ADMIN_LOCKED):
+    # host must be locked and online unless this is controller-0
+    if (chost['hostname'] != constants.CONTROLLER_0_HOSTNAME and
+            (chost['availability'] != constants.AVAILABILITY_ONLINE or
+            chost['administrative'] != constants.ADMIN_LOCKED)):
        raise wsme.exc.ClientSideError(
            _("Host %s must be locked and online." % chost['hostname']))

--- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py
+++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py
@ -5295,7 +5295,7 @@ class HostController(rest.RestController):
            pass
        elif StorageBackendConfig.has_backend_configured(
                pecan.request.dbapi,
-                constants.CINDER_BACKEND_CEPH):
+                constants.SB_TYPE_CEPH):
            if utils.is_aio_simplex_system(pecan.request.dbapi):
                # Check if host has enough OSDs configured for each tier
                tiers = pecan.request.dbapi.storage_tier_get_all()
--- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/storage.py
+++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/storage.py
@ -471,23 +471,33 @@ class StorageController(rest.RestController):
        except Exception as e:
            LOG.exception(e)
            raise
-
        # Make sure that we are allowed to delete
        _check_host(stor)

        # Delete the stor if supported
+        ihost_id = stor['forihostid']
+        ihost = pecan.request.dbapi.ihost_get(ihost_id)
        if stor.function == constants.STOR_FUNCTION_JOURNAL:
            # Host must be locked
-            ihost_id = stor['forihostid']
-            ihost = pecan.request.dbapi.ihost_get(ihost_id)
            if ihost['administrative'] != constants.ADMIN_LOCKED:
                raise wsme.exc.ClientSideError(_("Host %s must be locked." %
                                                ihost['hostname']))
            self.delete_stor(stor_uuid)
+        elif (stor.function == constants.STOR_FUNCTION_OSD and
+              stor.state == constants.SB_STATE_CONFIGURING_ON_UNLOCK):
+            # Host must be locked
+            if ihost['administrative'] != constants.ADMIN_LOCKED:
+                raise wsme.exc.ClientSideError(_("Host %s must be locked." %
+                                                ihost['hostname']))
+
+            self.delete_stor(stor_uuid)
        else:
            raise wsme.exc.ClientSideError(_(
-                   "Deleting a Storage Function other than %s is not "
-                   "supported on this setup") % constants.STOR_FUNCTION_JOURNAL)
+                   "Deleting a Storage Function other than '%s' and '%s' in "
+                   "state '%s' is not supported on this setup.") %
+                        (constants.STOR_FUNCTION_JOURNAL,
+                         constants.STOR_FUNCTION_OSD,
+                         constants.SB_STATE_CONFIGURING_ON_UNLOCK))

    def delete_stor(self, stor_uuid):
        """Delete a stor"""
@ -497,10 +507,10 @@ class StorageController(rest.RestController):
        try:
            # The conductor will handle removing the stor, not all functions
            # need special handling
-            if stor.function == constants.STOR_FUNCTION_OSD:
-                pecan.request.rpcapi.unconfigure_osd_istor(pecan.request.context,
-                                                           stor)
-            elif stor.function == constants.STOR_FUNCTION_JOURNAL:
+            # if stor.function == constants.STOR_FUNCTION_OSD:
+            #     pecan.request.rpcapi.unconfigure_osd_istor(pecan.request.context,
+            #                                                stor)
+            if stor.function == constants.STOR_FUNCTION_JOURNAL:
                pecan.request.dbapi.istor_disable_journal(stor_uuid)
            # Now remove the stor from DB
            pecan.request.dbapi.istor_remove_disk_association(stor_uuid)
@ -901,29 +911,21 @@ def _create(stor, iprofile=None):
                "Invalid stor device type: only SSD and NVME devices are supported"
                " for journal functions."))

+    if osd_create is True:
+        # Get the next free OSD ID in the system
+        stors = pecan.request.dbapi.istor_get_list(sort_key='osdid', sort_dir='asc')
+        stors_ids = [s['osdid'] for s in stors if s['osdid'] is not None]
+        if stors_ids:
+            candidate_ids = [i for i in range(0, stors_ids[-1] + 2) if i not in stors_ids]
+            create_attrs['osdid'] = candidate_ids[0]
+        else:
+            create_attrs['osdid'] = 0
+    else:
+        create_attrs['osdid'] = None
+
    new_stor = pecan.request.dbapi.istor_create(forihostid,
                                                create_attrs)

-    # Create an osd associated with disk.
-    if osd_create is True:
-        try:
-            new_stor = pecan.request.rpcapi.configure_osd_istor(
-                pecan.request.context, new_stor)
-        except Exception as cpe:
-            LOG.exception(cpe)
-            # Delete the partially configure istor
-            pecan.request.dbapi.istor_destroy(new_stor.uuid)
-            raise wsme.exc.ClientSideError(_(
-                "Internal error: failed to create a storage object. "
-                "Make sure storage cluster is up and healthy."))
-
-        if iprofile:
-            new_stor = pecan.request.dbapi.istor_update(new_stor.uuid,
-                                                        {'osdid': None})
-        else:
-            # Update the database record
-            new_stor.save(pecan.request.context)
-
    # Associate the disk to db record
    values = {'foristorid': new_stor.id}
    pecan.request.dbapi.idisk_update(idisk_uuid,
--- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/storage_ceph.py
+++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/storage_ceph.py
@ -339,7 +339,6 @@ class StorageCephController(rest.RestController):
    @wsme_pecan.wsexpose(None, types.uuid, status_code=204)
    def delete(self, storageceph_uuid):
        """Delete a backend."""
-
        return _delete(storageceph_uuid)


@ -696,14 +695,8 @@ def _apply_backend_changes(op, sb_obj):
    services = api_helper.getListFromServices(sb_obj.as_dict())

    if op == constants.SB_API_OP_CREATE:
-        if sb_obj.name == constants.SB_DEFAULT_NAMES[
+        if sb_obj.name != constants.SB_DEFAULT_NAMES[
                constants.SB_TYPE_CEPH]:
-            # Apply manifests for primary tier
-            pecan.request.rpcapi.update_ceph_config(pecan.request.context,
-                                                    sb_obj.uuid,
-                                                    services)
-
-        else:
            # Enable the service(s) use of the backend
            if constants.SB_SVC_CINDER in services:
                pecan.request.rpcapi.update_ceph_services(
@ -800,10 +793,21 @@ def _set_defaults(storage_ceph):
        'kube_pool_gib': None,
        'object_gateway': False,
    }
+
    sc = api_helper.set_backend_data(storage_ceph,
                                     defaults,
                                     CAPABILITIES,
                                     constants.SB_CEPH_SVCS_SUPPORTED)
+
+    # Ceph is our default storage backend and is added at configuration
+    # set state and task accordingly.
+    if sc['name'] == constants.SB_DEFAULT_NAMES[constants.SB_TYPE_CEPH]:
+        sc['state'] = constants.SB_STATE_CONFIGURED
+        if utils.is_aio_simplex_system(pecan.request.dbapi):
+            sc['task'] = None
+        else:
+            sc['task'] = constants.SB_TASK_RECONFIG_CONTROLLER
+
    return sc


--- a/sysinv/sysinv/sysinv/sysinv/common/ceph.py
+++ b/sysinv/sysinv/sysinv/sysinv/common/ceph.py
@ -24,6 +24,8 @@ import pecan
 import os
 import requests

+from sysinv.api.controllers.v1.utils import is_aio_system
+
 LOG = logging.getLogger(__name__)


@ -704,11 +706,36 @@ class CephApiOperator(object):

 def fix_crushmap(dbapi=None):
    """ Set Ceph's CRUSH Map based on storage model """
+    def _create_crushmap_flag_file():
+        try:
+            open(crushmap_flag_file, "w").close()
+        except IOError as e:
+            LOG.warn(_('Failed to create flag file: {}. '
+                       'Reason: {}').format(crushmap_flag_file, e))
+
    if not dbapi:
        dbapi = pecan.request.dbapi
    crushmap_flag_file = os.path.join(constants.SYSINV_CONFIG_PATH,
                                      constants.CEPH_CRUSH_MAP_APPLIED)
+
    if not os.path.isfile(crushmap_flag_file):
+        _operator = CephApiOperator()
+        if not is_aio_system(dbapi):
+            # At least two monitors have to be running on a standard deployment,
+            # otherwise don't even try to load the crushmap.
+            active_mons, required_mons, __ = _operator.get_monitors_status(dbapi)
+            if required_mons > active_mons:
+                LOG.info("Not enough monitors yet available to fix crushmap.")
+                return False
+
+        # Crushmap may be already loaded thorough puppet, avoid doing it twice.
+        default_ceph_tier_name = constants.SB_TIER_DEFAULT_NAMES[
+                constants.SB_TIER_TYPE_CEPH] + constants.CEPH_CRUSH_TIER_SUFFIX
+        rule_is_present, __, __ = _operator._crush_rule_status(default_ceph_tier_name)
+        if rule_is_present:
+            _create_crushmap_flag_file()
+            return False
+
        stor_model = get_ceph_storage_model(dbapi)
        if stor_model == constants.CEPH_AIO_SX_MODEL:
            crushmap_txt = "/etc/sysinv/crushmap-aio-sx.txt"
@ -731,13 +758,10 @@ def fix_crushmap(dbapi=None):
            # May not be critical, depends on where this is called.
            reason = "Error: %s Output: %s" % (str(e), e.output)
            raise exception.CephCrushMapNotApplied(reason=reason)
-        try:
-            open(crushmap_flag_file, "w").close()
-        except IOError as e:
-            LOG.warn(_('Failed to create flag file: {}. '
-                       'Reason: {}').format(crushmap_flag_file, e))
+        _create_crushmap_flag_file()

        return True
+    return False


 def get_ceph_storage_model(dbapi=None):
--- a/sysinv/sysinv/sysinv/sysinv/common/storage_backend_conf.py
+++ b/sysinv/sysinv/sysinv/sysinv/common/storage_backend_conf.py
@ -220,6 +220,7 @@ class StorageBackendConfig(object):
        }

        ceph_mons = dbapi.ceph_mon_get_list()
+        ceph_mon = None
        for ceph_mon in ceph_mons:
            if ceph_mon['hostname'] == constants.CONTROLLER_0_HOSTNAME:
                targets.update({'%s-%s' % (constants.CONTROLLER_0_HOSTNAME,
@ -231,8 +232,6 @@ class StorageBackendConfig(object):
                targets.update({'%s-%s' % (ceph_mon['hostname'],
                                           network_type): 'ceph-mon-2-ip'})

-        ceph_mon['ceph_mon_gib'] = ceph_mons[0]['ceph_mon_gib']
-
        results = {}
        addrs = dbapi.addresses_get_all()
        for addr in addrs:
--- a/sysinv/sysinv/sysinv/sysinv/conductor/ceph.py
+++ b/sysinv/sysinv/sysinv/sysinv/conductor/ceph.py
@ -124,7 +124,6 @@ class CephOperator(object):
        # cluster UUID value that is valid and consistent for the state of the
        # installation. Also make sure that we have a cluster DB entry
        # established
-        LOG.debug("_init_db_cluster_and_tier: Reteiving cluster record")
        try:
            self._db_cluster = self._db_api.clusters_get_all(
                type=constants.CINDER_BACKEND_CEPH)[0]
@ -132,7 +131,7 @@ class CephOperator(object):
                # Retrieve ceph cluster fsid and update database
                fsid = self._get_fsid()
                if uuidutils.is_uuid_like(fsid):
-                    LOG.debug("Update cluster record: fsid=%s." % fsid)
+                    LOG.info("Update cluster record: fsid=%s." % fsid)
                    self._db_cluster.cluster_uuid = fsid
                    self._db_api.cluster_update(
                        self.cluster_db_uuid,
@ -155,7 +154,7 @@ class CephOperator(object):

        # Try to use ceph cluster fsid
        fsid = self._get_fsid()
-        LOG.info("Create new cluster record: fsid=%s." % fsid)
+        LOG.info("Create new ceph cluster record: fsid=%s." % fsid)
        # Create the default primary cluster
        self._db_cluster = self._db_api.cluster_create(
            {'uuid': fsid if uuidutils.is_uuid_like(fsid) else str(uuid.uuid4()),
@ -165,6 +164,7 @@ class CephOperator(object):
             'system_id': isystem.id})

        # Create the default primary ceph storage tier
+        LOG.info("Create primary ceph tier record.")
        self._db_primary_tier = self._db_api.storage_tier_create(
            {'forclusterid': self.cluster_id,
             'name': constants.SB_TIER_DEFAULT_NAMES[constants.SB_TIER_TYPE_CEPH],
@ -831,67 +831,6 @@ class CephOperator(object):
                    name=rule_name, reason=body['status'])
                raise e

-    # TODO(CephPoolsDecouple): remove
-    def configure_osd_pools(self, ceph_backend=None, new_pool_size=None, new_pool_min_size=None):
-        """Create or resize all of the osd pools as needed
-           ceph backend could be 2nd backend which is in configuring state
-        """
-        # Handle pools for multiple tiers
-        tiers = self._db_api.storage_tier_get_by_cluster(self.cluster_db_uuid)
-        ceph_tiers = [t for t in tiers if t.type == constants.SB_TIER_TYPE_CEPH]
-        ceph_backends = self._db_api.storage_ceph_get_list()
-
-        for t in ceph_tiers:
-            # Get corresponding ceph backend for the tier, if any
-            bk = None
-            for bk in ceph_backends:
-                if t.forbackendid == bk.id:
-                    break
-
-            # Get pool replication parameters
-            pool_size, pool_min_size = StorageBackendConfig.get_ceph_pool_replication(self._db_api, bk)
-            if bk and ceph_backend and bk.name == ceph_backend.name:
-                # Override replication
-                pool_size = new_pool_size if new_pool_size else pool_size
-                pool_min_size = new_pool_min_size if new_pool_min_size else pool_min_size
-
-            # Configure tier OSD pools
-            if t.uuid == self.primary_tier_uuid:
-                # This is primary tier
-                # In case we're updating pool_size to a different value than
-                # default. Just update pool size for ceph's default pool 'rbd'
-                # as well
-                try:
-                    self._configure_primary_tier_pool(
-                        {'pool_name': constants.CEPH_POOL_RBD_NAME,
-                         'pg_num': constants.CEPH_POOL_RBD_PG_NUM,
-                         'pgp_num': constants.CEPH_POOL_RBD_PGP_NUM},
-                        pool_size,
-                        pool_min_size)
-                except exception.CephFailure:
-                    pass
-
-                # Handle primary tier pools (cinder/glance/swift/ephemeral)
-                for pool in CEPH_POOLS:
-                    # TODO(rchurch): The following is added for R3->R4 upgrades. Can we
-                    # remove this for R5? Or is there some R3->R4->R5 need to keep this
-                    # around.
-                    try:
-                        self.update_ceph_object_pool_name(pool)
-                    except exception.CephFailure:
-                        pass
-
-                    self._configure_primary_tier_pool(pool, pool_size,
-                                                      pool_min_size)
-            else:
-                try:
-                    self._configure_secondary_tier_pools(t, pool_size,
-                                                         pool_min_size)
-                except exception.CephPoolRulesetFailure as e:
-                    LOG.info("Cannot add pools: %s" % e)
-                except exception.CephFailure as e:
-                    LOG.info("Cannot add pools: %s" % e)
-
    def _update_db_capabilities(self, bk, new_storceph):
        # Avoid updating DB for all capabilities in new_storceph as we
        # don't manage them. Leave the callers deal with it.
--- a/sysinv/sysinv/sysinv/sysinv/conductor/manager.py
+++ b/sysinv/sysinv/sysinv/sysinv/conductor/manager.py
@ -283,15 +283,6 @@ class ConductorManager(service.PeriodicService):
        self.dbapi.remotelogging_create(system_id_attribute_value)
        self.dbapi.ptp_create(system_id_attribute_value)

-        # set default storage_backend
-        values.update({'backend': constants.SB_TYPE_FILE,
-                       'name': constants.SB_DEFAULT_NAMES[constants.SB_TYPE_FILE],
-                       'state': constants.SB_STATE_CONFIGURED,
-                       'task': constants.SB_TASK_NONE,
-                       'services': None,
-                       'capabilities': {}})
-        self.dbapi.storage_backend_create(values)
-
        # populate service table
        for optional_service in constants.ALL_OPTIONAL_SERVICES:
            self.dbapi.service_create({'name': optional_service,
@ -4378,8 +4369,9 @@ class ConductorManager(service.PeriodicService):

        if availability == constants.AVAILABILITY_AVAILABLE:
            if imsg_dict.get(constants.SYSINV_AGENT_FIRST_REPORT):
-                # This should be run once after a boot
+                # This should be run once after a node boot
                self._clear_ceph_stor_state(ihost_uuid)
+                cceph.fix_crushmap(self.dbapi)
            config_uuid = imsg_dict['config_applied']
            self._update_host_config_applied(context, ihost, config_uuid)

@ -5215,39 +5207,6 @@ class ConductorManager(service.PeriodicService):
        # Not sure yet what the proper response is here
        pass

-    def configure_osd_istor(self, context, istor_obj):
-        """Synchronously, have a conductor configure an OSD istor.
-
-        Does the following tasks:
-        - Allocates an OSD.
-        - Creates or resizes an OSD pool as necessary.
-
-        :param context: request context.
-        :param istor_obj: an istor object.
-        :returns: istor object, with updated osdid
-        """
-
-        if istor_obj['osdid']:
-            LOG.error("OSD already assigned: %s", str(istor_obj['osdid']))
-            raise exception.SysinvException(_(
-                "Invalid method call: osdid already assigned: %s") %
-                    str(istor_obj['osdid']))
-
-        # Create the OSD
-        response, body = self._ceph.osd_create(istor_obj['uuid'], body='json')
-        if not response.ok:
-            LOG.error("OSD create failed: %s", response.reason)
-            response.raise_for_status()
-
-        # Update the osdid in the stor object
-        istor_obj['osdid'] = body['output']['osdid']
-
-        # TODO(CephPoolsDecouple): remove
-        if not utils.is_kubernetes_config(self.dbapi):
-            self._ceph.configure_osd_pools()
-
-        return istor_obj
-
    def restore_ceph_config(self, context, after_storage_enabled=False):
        """Restore Ceph configuration during Backup and Restore process.

--- a/sysinv/sysinv/sysinv/sysinv/puppet/ceph.py
+++ b/sysinv/sysinv/sysinv/sysinv/puppet/ceph.py
@ -47,6 +47,9 @@ class CephPuppet(openstack.OpenstackBasePuppet):
        ceph_mon_ips = StorageBackendConfig.get_ceph_mon_ip_addresses(
            self.dbapi)

+        if not ceph_mon_ips:
+            return {}  # system configuration is not yet ready
+
        controller_hosts = [constants.CONTROLLER_0_HOSTNAME, constants.CONTROLLER_1_HOSTNAME]
        mon_2_host = [mon['hostname'] for mon in self.dbapi.ceph_mon_get_list() if
                      mon['hostname'] not in controller_hosts]