Merge "Make Ceph the default Storage Backend"

2019-04-17 21:48:29 +00:00 · 2019-04-17 21:48:29 +00:00 · 5070f6491b
parent 3cd87d03f6 fcf3fbf5f4
commit 5070f6491b
20 changed files with 241 additions and 249 deletions
--- a/controllerconfig/controllerconfig/controllerconfig/configassistant.py
+++ b/controllerconfig/controllerconfig/controllerconfig/configassistant.py
@ -5077,6 +5077,15 @@ class ConfigAssistant():
            }
            client.sysinv.route.create(**values)
    def _populate_default_storage_backend(self, client, controller):
        # Create the Ceph monitor for controller-0
        values = {'ihost_uuid': controller.uuid}
        client.sysinv.ceph_mon.create(**values)
        # Create the Ceph default backend
        values = {'confirmed': True}
        client.sysinv.storage_ceph.create(**values)
    def _populate_infrastructure_interface(self, client, controller):
        """Configure the infrastructure interface(s)"""
        if not self.infrastructure_interface:
@ -5349,6 +5358,7 @@ class ConfigAssistant():
                # ceph_mon config requires controller host to be created
                self._inventory_config_complete_wait(client, controller)
                self._populate_interface_config(client, controller)
                self._populate_default_storage_backend(client, controller)
        except (KeystoneFail, SysInvFail) as e:
            LOG.exception(e)
--- a/playbookconfig/playbookconfig/playbooks/bootstrap/roles/persist-config/files/populate_initial_config.py
+++ b/playbookconfig/playbookconfig/playbooks/bootstrap/roles/persist-config/files/populate_initial_config.py
@ -19,8 +19,8 @@ import time
 # constants in controllerconfig. When it is time to remove/deprecate these
 # packages, classes OpenStack, Token and referenced constants need to be moved
 # to this standalone script.
 from controllerconfig import ConfigFail
 from controllerconfig.common import constants
 from controllerconfig import ConfigFail
 from controllerconfig import openstack
 from controllerconfig import sysinv_api as sysinv
@ -711,6 +711,19 @@ def inventory_config_complete_wait(client, controller):
    wait_pv_config(client, controller)
 def populate_default_storage_backend(client, controller):
    if not INITIAL_POPULATION:
        return
    print("Populating ceph-mon config for controller-0...")
    values = {'ihost_uuid': controller.uuid}
    client.sysinv.ceph_mon.create(**values)
    print("Populating ceph storage backend config...")
    values = {'confirmed': True}
    client.sysinv.storage_ceph.create(**values)
 def handle_invalid_input():
    raise Exception("Invalid input!\nUsage: <bootstrap-config-file> "
                    "[--system] [--network] [--service]")
@ -757,6 +770,7 @@ if __name__ == '__main__':
            populate_docker_config(client)
            controller = populate_controller_config(client)
            inventory_config_complete_wait(client, controller)
            populate_default_storage_backend(client, controller)
            os.remove(config_file)
            if INITIAL_POPULATION:
                print("Successfully updated the initial system config.")
--- a/puppet-manifests/src/modules/platform/lib/facter/is_initial_cinder_ceph_config.rb
+++ b/puppet-manifests/src/modules/platform/lib/facter/is_initial_cinder_ceph_config.rb
@ -1,8 +0,0 @@
 # Returns true if cinder ceph needs to be configured
 Facter.add("is_initial_cinder_ceph_config") do
  setcode do
    conf_path = Facter::Core::Execution.exec("hiera --config /etc/puppet/hiera.yaml platform::params::config_path")
    ! File.exist?(conf_path +'.initial_cinder_ceph_config_complete')
  end
 end
--- a/puppet-manifests/src/modules/platform/lib/facter/is_initial_cinder_config.rb
+++ b/puppet-manifests/src/modules/platform/lib/facter/is_initial_cinder_config.rb
@ -1,8 +0,0 @@
 # Returns true is this is the initial cinder config for this system
 Facter.add("is_initial_cinder_config") do
  setcode do
    conf_path = Facter::Core::Execution.exec("hiera --config /etc/puppet/hiera.yaml platform::params::config_path")
    ! File.exist?(conf_path + '.initial_cinder_config_complete')
  end
 end
--- a/puppet-manifests/src/modules/platform/lib/facter/is_initial_cinder_lvm_config.rb
+++ b/puppet-manifests/src/modules/platform/lib/facter/is_initial_cinder_lvm_config.rb
@ -1,8 +0,0 @@
 # Returns true if cinder lvm needs to be configured
 Facter.add("is_initial_cinder_lvm_config") do
  setcode do
    conf_path = Facter::Core::Execution.exec("hiera --config /etc/puppet/hiera.yaml platform::params::config_path")
    ! File.exist?(conf_path + '.initial_cinder_lvm_config_complete')
  end
 end
--- a/puppet-manifests/src/modules/platform/lib/facter/is_node_cinder_lvm_config.rb
+++ b/puppet-manifests/src/modules/platform/lib/facter/is_node_cinder_lvm_config.rb
@ -1,7 +0,0 @@
 # Returns true if cinder LVM needs to be configured on current node
 Facter.add("is_node_cinder_lvm_config") do
  setcode do
    ! File.exist?('/etc/platform/.node_cinder_lvm_config_complete')
  end
 end
--- a/puppet-manifests/src/modules/platform/manifests/ceph.pp
+++ b/puppet-manifests/src/modules/platform/manifests/ceph.pp
@ -153,11 +153,11 @@ class platform::ceph::post
  }
  if $service_enabled {
    # Ceph configuration on this node is done
    file { $node_ceph_configured_flag:
      ensure => present
    }
  }
 }
@ -169,8 +169,8 @@ class platform::ceph::monitor
  if $service_enabled {
    if $system_type == 'All-in-one' and 'duplex' in $system_mode {
-      if str2bool($::is_controller_active) {
+      if str2bool($::is_standalone_controller) {
-        # Ceph mon is configured on a DRBD partition, on the active controller,
+        # Ceph mon is configured on a DRBD partition,
        # when 'ceph' storage backend is added in sysinv.
        # Then SM takes care of starting ceph after manifests are applied.
        $configure_ceph_mon = true
@ -236,6 +236,31 @@ class platform::ceph::monitor
    # ensure configuration is complete before creating monitors
    Class['::ceph'] -> Ceph::Mon <| |>
    # ensure we load the crushmap at first unlock
    if $system_type == 'All-in-one' and str2bool($::is_standalone_controller) {
      if 'duplex' in $system_mode {
        $crushmap_txt = '/etc/sysinv/crushmap-controller-model.txt'
      } else {
        $crushmap_txt = '/etc/sysinv/crushmap-aio-sx.txt'
      }
      $crushmap_bin = '/etc/sysinv/crushmap.bin'
      Ceph::Mon <| |>
      -> exec { 'Compile crushmap':
        command   => "crushtool -c ${crushmap_txt} -o ${crushmap_bin}",
        onlyif    => "test ! -f ${crushmap_bin}",
        logoutput => true,
      }
      -> exec { 'Set crushmap':
        command   => "ceph osd setcrushmap -i ${crushmap_bin}",
        unless    => 'ceph osd crush rule list --format plain | grep -e "storage_tier_ruleset"',
        logoutput => true,
      }
      -> Platform_ceph_osd <| |>
    }
    # Ensure networking is up before Monitors are configured
    Anchor['platform::networking'] -> Ceph::Mon <| |>
    # default configuration for all ceph monitor resources
    Ceph::Mon {
      fsid => $cluster_uuid,
@ -248,33 +273,10 @@ class platform::ceph::monitor
        public_addr => $floating_mon_ip,
      }
-      if (str2bool($::is_controller_active) and
+      # On AIO-DX there is a single, floating, Ceph monitor backed by DRBD.
-          str2bool($::is_initial_cinder_ceph_config) and
+      # Therefore DRBD must be up before Ceph monitor is configured
-          !str2bool($::is_standalone_controller)) {
+      Drbd::Resource <| |> -> Ceph::Mon <| |>
        # When we configure ceph after both controllers are active,
        # we need to stop the monitor, unmount the monitor partition
        # and set the drbd role to secondary, so that the handoff to
        # SM is done properly once we swact to the standby controller.
        # TODO: Remove this once SM supports in-service config reload.
        Ceph::Mon <| |>
        -> exec { 'Stop Ceph monitor':
          command   =>'/etc/init.d/ceph stop mon',
          onlyif    => '/etc/init.d/ceph status mon',
          logoutput => true,
        }
        -> exec { 'umount ceph-mon partition':
          command   => "umount ${mon_mountpoint}",
          onlyif    => "mount | grep -q ${mon_mountpoint}",
          logoutput => true,
        }
        -> exec { 'Set cephmon secondary':
          command   => 'drbdadm secondary drbd-cephmon',
          unless    => "drbdadm role drbd-cephmon | egrep '^Secondary'",
          logoutput => true,
        }
      }
    } else {
      if $::hostname == $mon_0_host {
        ceph::mon { $mon_0_host:
@ -295,8 +297,7 @@ class platform::ceph::monitor
  }
 }
-
+define osd_crush_location(
 define platform_ceph_osd(
  $osd_id,
  $osd_uuid,
  $disk_path,
@ -311,12 +312,28 @@ define platform_ceph_osd(
      "osd.${$osd_id}/crush_location": value => "root=${tier_name}-tier host=${$::platform::params::hostname}-${$tier_name}";
    }
  }
-  file { "/var/lib/ceph/osd/ceph-${osd_id}":
+}
 define platform_ceph_osd(
  $osd_id,
  $osd_uuid,
  $disk_path,
  $data_path,
  $journal_path,
  $tier_name,
 ) {
  Anchor['platform::networking']  # Make sure networking is up before running ceph commands
  -> file { "/var/lib/ceph/osd/ceph-${osd_id}":
    ensure => 'directory',
    owner  => 'root',
    group  => 'root',
    mode   => '0755',
  }
  -> exec { "ceph osd create ${osd_uuid} ${osd_id}":
    logoutput => true,
    command   => template('platform/ceph.osd.create.erb'),
  }
  -> ceph::osd { $disk_path:
    uuid => $osd_uuid,
  }
@ -351,8 +368,13 @@ class platform::ceph::osds(
    mode   => '0755',
  }
  # Ensure ceph.conf is complete before configuring OSDs
  Class['::ceph'] -> Platform_ceph_osd <| |>
  # Journal disks need to be prepared before the OSDs are configured
  Platform_ceph_journal <| |> -> Platform_ceph_osd <| |>
  # Crush locations in ceph.conf need to be set before the OSDs are configured
  Osd_crush_location <| |> -> Platform_ceph_osd <| |>
  # default configuration for all ceph object resources
  Ceph::Osd {
@ -360,6 +382,7 @@ class platform::ceph::osds(
    cluster_uuid => $cluster_uuid,
  }
  create_resources('osd_crush_location', $osd_config)
  create_resources('platform_ceph_osd', $osd_config)
  create_resources('platform_ceph_journal', $journal_config)
 }
@ -479,6 +502,7 @@ class platform::ceph::runtime_base {
 class platform::ceph::runtime_osds {
  include ::ceph::params
  include ::platform::ceph
  include ::platform::ceph::osds
  # Since this is runtime we have to avoid checking status of Ceph while we
--- a/puppet-manifests/src/modules/platform/manifests/drbd.pp
+++ b/puppet-manifests/src/modules/platform/manifests/drbd.pp
@ -445,16 +445,13 @@ class platform::drbd::cephmon ()
  $system_mode = $::platform::params::system_mode
  $system_type = $::platform::params::system_type
-  #TODO: This will change once we remove the native cinder service
+  if str2bool($::is_standalone_controller) and ! str2bool($::is_node_ceph_configured) {
  if (str2bool($::is_initial_config_primary) or
      (str2bool($::is_controller_active) and str2bool($::is_initial_cinder_ceph_config))
  ){
    # Active controller, first time configuration.
    $drbd_primary = true
    $drbd_initial = true
    $drbd_automount = true
-  } elsif str2bool($::is_standalone_controller){
+  } elsif str2bool($::is_standalone_controller) {
    # Active standalone controller, successive reboots.
    $drbd_primary = true
    $drbd_initial = undef
@ -490,9 +487,9 @@ class platform::drbd(
  $service_enable = false,
  $service_ensure = 'stopped',
 ) {
-  if (str2bool($::is_initial_config_primary)
+  if (str2bool($::is_initial_config_primary) or str2bool($::is_standalone_controller)
  ){
-    # Enable DRBD at config_controller
+    # Enable DRBD on standalone
    class { '::drbd':
      service_enable => true,
      service_ensure => 'running',
@ -553,41 +550,55 @@ class platform::drbd::runtime {
  }
 }
 class platform::drbd::runtime_service_enable {
  class { '::drbd':
    service_enable => true,
    service_ensure => 'running'
  }
 }
 class platform::drbd::pgsql::runtime {
  include ::platform::drbd::params
  include ::platform::drbd::runtime_service_enable
  include ::platform::drbd::pgsql
 }
 class platform::drbd::cgcs::runtime {
  include ::platform::drbd::params
  include ::platform::drbd::runtime_service_enable
  include ::platform::drbd::cgcs
 }
 class platform::drbd::extension::runtime {
  include ::platform::drbd::params
  include ::platform::drbd::runtime_service_enable
  include ::platform::drbd::extension
 }
 class platform::drbd::patch_vault::runtime {
  include ::platform::drbd::params
  include ::platform::drbd::runtime_service_enable
  include ::platform::drbd::patch_vault
 }
 class platform::drbd::etcd::runtime {
  include ::platform::drbd::params
  include ::platform::drbd::runtime_service_enable
  include ::platform::drbd::etcd
 }
 class platform::drbd::dockerdistribution::runtime {
  include ::platform::drbd::params
  include ::platform::drbd::runtime_service_enable
  include ::platform::drbd::dockerdistribution
 }
 class platform::drbd::cephmon::runtime {
  include ::platform::drbd::params
  include ::platform::drbd::runtime_service_enable
  include ::platform::drbd::cephmon
 }
--- a/puppet-manifests/src/modules/platform/templates/ceph.osd.create.erb
+++ b/puppet-manifests/src/modules/platform/templates/ceph.osd.create.erb
@ -0,0 +1,52 @@
 /bin/true # puppet requires this for correct template parsing
 # This is needed to pin a specific OSD id with a corresponding UUID.
 # Problem is ceph-disk prepare doesn't accept ceph OSD id as cli
 # parameter. Therefore, the OSD with desired OSD ID and UUID must
 # exist before puppet ceph module executes ceph-disk prepare.
 set -x
 osd_id=<%= @osd_id %>
 osd_uuid=<%= @osd_uuid %>
 # Ignore if Ceph is down, this case should only happen on DOR
 timeout 10 ceph -s
 if [ $? -ne 0 ]; then
    exit 0
 fi
 # Check if OSD exists and has the correct UUID
 osds=( $(ceph osd ls) )
 if [[ " ${osds[@]} " =~ " ${osd_id} " ]]; then
    # Get UUID, this is slower than osd ls as it also lists PGs with problems
    # but is the only way to get the uuid of an OSD.
    found_uuid=$(ceph osd dump | grep "^osd.${osd_id} " | awk '{print $NF}')
    if [ "${found_uuid}" != "${osd_uuid}" ]; then
        # At B&R ceph's crushmap is restored but, although OSDs are properly
        # allocated to their hosts in the tree, crushmap does not store
        # OSD UUIDs. Therefore, w/o osd_id and uuid match, when the OSD is
        # prepared there is a chance that ceph-disk will create a new OSD
        # that will no longer match the osd id in sysinv db. So, we have
        # to remove OSDs that don't match UUIDs and recreate them with
        # expected OSD ID and UUID so that ceph-disk does not get confused.
        ceph osd rm ${osd_id}
        RET=$?
        if [ $RET -ne 0 ]; then
            echo "Error removing osd ${osd_id}, exit code: ${RET}"
            exit $RET
        fi
    else
        # OSD exists and has the correct uuid
        exit 0
    fi
 fi
 # Create the OSD with desired id and uuid
 ceph osd create ${osd_uuid} ${osd_id}
 RET=$?
 if [ $RET -ne 0 ]; then
    echo "Error creating osd ${osd_id}, exit code: ${RET}"
    exit $RET
 fi
 set +x
--- a/sysinv/cgts-client/cgts-client/cgtsclient/v1/storage_backend.py
+++ b/sysinv/cgts-client/cgts-client/cgtsclient/v1/storage_backend.py
@ -11,8 +11,6 @@ from cgtsclient.common import base
 from cgtsclient.common import constants
 from cgtsclient.common import utils
 from cgtsclient import exc
 from cgtsclient.v1 import ceph_mon as ceph_mon_utils
 from cgtsclient.v1 import ihost as ihost_utils
 from cgtsclient.v1 import storage_ceph  # noqa
 from cgtsclient.v1 import storage_ceph_external  # noqa
 from cgtsclient.v1 import storage_external  # noqa
@ -138,23 +136,6 @@ def _display_next_steps():
 def backend_add(cc, backend, args):
    backend = backend.replace('-', '_')
    # add ceph mons to controllers
    if backend == constants.SB_TYPE_CEPH:
        # Controllers should always have monitors.
        # Not finding a controller means it's not yet configured,
        # so move forward.
        try:
            ihost = ihost_utils._find_ihost(cc, constants.CONTROLLER_0_HOSTNAME)
            ceph_mon_utils.ceph_mon_add(cc, args, ihost.uuid)
        except exc.CommandError:
            pass
        try:
            ihost = ihost_utils._find_ihost(cc, constants.CONTROLLER_1_HOSTNAME)
            ceph_mon_utils.ceph_mon_add(cc, args, ihost.uuid)
        except exc.CommandError:
            pass
    # allowed storage_backend fields
    allowed_fields = ['name', 'services', 'confirmed', 'ceph_conf']
--- a/sysinv/sysinv/sysinv/sysinv/agent/manager.py
+++ b/sysinv/sysinv/sysinv/sysinv/agent/manager.py
@ -512,6 +512,10 @@ class AgentManager(service.PeriodicService):
            msg_dict.update({constants.HOST_ACTION_STATE:
                             constants.HAS_REINSTALLED})
        # Is this the first time since boot we are reporting to conductor?
        msg_dict.update({constants.SYSINV_AGENT_FIRST_REPORT:
                         not os.path.exists(SYSINV_FIRST_REPORT_FLAG)})
        try:
            rpcapi.iplatform_update_by_ihost(context,
                                             host_uuid,
@ -916,10 +920,6 @@ class AgentManager(service.PeriodicService):
                if iscsi_initiator_name is not None:
                    imsg_dict.update({'iscsi_initiator_name': iscsi_initiator_name})
                # Is this the first time since boot we are reporting to conductor?
                imsg_dict.update({constants.SYSINV_AGENT_FIRST_REPORT:
                                  not os.path.exists(SYSINV_FIRST_REPORT_FLAG)})
                self.platform_update_by_host(rpcapi,
                                             icontext,
                                             self._ihost_uuid,
--- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/ceph_mon.py
+++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/ceph_mon.py
@ -446,9 +446,10 @@ def _create(ceph_mon):
                  "replication is set to: %s'. Please update replication "
                  "before configuring a monitor on a worker node." % supported_replication))
-    # host must be locked and online
+    # host must be locked and online unless this is controller-0
-    if (chost['availability'] != constants.AVAILABILITY_ONLINE or
+    if (chost['hostname'] != constants.CONTROLLER_0_HOSTNAME and
-            chost['administrative'] != constants.ADMIN_LOCKED):
+            (chost['availability'] != constants.AVAILABILITY_ONLINE or
            chost['administrative'] != constants.ADMIN_LOCKED)):
        raise wsme.exc.ClientSideError(
            _("Host %s must be locked and online." % chost['hostname']))
--- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py
+++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py
@ -5295,7 +5295,7 @@ class HostController(rest.RestController):
            pass
        elif StorageBackendConfig.has_backend_configured(
                pecan.request.dbapi,
-                constants.CINDER_BACKEND_CEPH):
+                constants.SB_TYPE_CEPH):
            if utils.is_aio_simplex_system(pecan.request.dbapi):
                # Check if host has enough OSDs configured for each tier
                tiers = pecan.request.dbapi.storage_tier_get_all()
--- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/storage.py
+++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/storage.py
@ -471,23 +471,33 @@ class StorageController(rest.RestController):
        except Exception as e:
            LOG.exception(e)
            raise
        # Make sure that we are allowed to delete
        _check_host(stor)
        # Delete the stor if supported
        ihost_id = stor['forihostid']
        ihost = pecan.request.dbapi.ihost_get(ihost_id)
        if stor.function == constants.STOR_FUNCTION_JOURNAL:
            # Host must be locked
            ihost_id = stor['forihostid']
            ihost = pecan.request.dbapi.ihost_get(ihost_id)
            if ihost['administrative'] != constants.ADMIN_LOCKED:
                raise wsme.exc.ClientSideError(_("Host %s must be locked." %
                                                ihost['hostname']))
            self.delete_stor(stor_uuid)
        elif (stor.function == constants.STOR_FUNCTION_OSD and
              stor.state == constants.SB_STATE_CONFIGURING_ON_UNLOCK):
            # Host must be locked
            if ihost['administrative'] != constants.ADMIN_LOCKED:
                raise wsme.exc.ClientSideError(_("Host %s must be locked." %
                                                ihost['hostname']))
            self.delete_stor(stor_uuid)
        else:
            raise wsme.exc.ClientSideError(_(
-                   "Deleting a Storage Function other than %s is not "
+                   "Deleting a Storage Function other than '%s' and '%s' in "
-                   "supported on this setup") % constants.STOR_FUNCTION_JOURNAL)
+                   "state '%s' is not supported on this setup.") %
                        (constants.STOR_FUNCTION_JOURNAL,
                         constants.STOR_FUNCTION_OSD,
                         constants.SB_STATE_CONFIGURING_ON_UNLOCK))
    def delete_stor(self, stor_uuid):
        """Delete a stor"""
@ -497,10 +507,10 @@ class StorageController(rest.RestController):
        try:
            # The conductor will handle removing the stor, not all functions
            # need special handling
-            if stor.function == constants.STOR_FUNCTION_OSD:
+            # if stor.function == constants.STOR_FUNCTION_OSD:
-                pecan.request.rpcapi.unconfigure_osd_istor(pecan.request.context,
+            #     pecan.request.rpcapi.unconfigure_osd_istor(pecan.request.context,
-                                                           stor)
+            #                                                stor)
-            elif stor.function == constants.STOR_FUNCTION_JOURNAL:
+            if stor.function == constants.STOR_FUNCTION_JOURNAL:
                pecan.request.dbapi.istor_disable_journal(stor_uuid)
            # Now remove the stor from DB
            pecan.request.dbapi.istor_remove_disk_association(stor_uuid)
@ -901,29 +911,21 @@ def _create(stor, iprofile=None):
                "Invalid stor device type: only SSD and NVME devices are supported"
                " for journal functions."))
    if osd_create is True:
        # Get the next free OSD ID in the system
        stors = pecan.request.dbapi.istor_get_list(sort_key='osdid', sort_dir='asc')
        stors_ids = [s['osdid'] for s in stors if s['osdid'] is not None]
        if stors_ids:
            candidate_ids = [i for i in range(0, stors_ids[-1] + 2) if i not in stors_ids]
            create_attrs['osdid'] = candidate_ids[0]
        else:
            create_attrs['osdid'] = 0
    else:
        create_attrs['osdid'] = None
    new_stor = pecan.request.dbapi.istor_create(forihostid,
                                                create_attrs)
    # Create an osd associated with disk.
    if osd_create is True:
        try:
            new_stor = pecan.request.rpcapi.configure_osd_istor(
                pecan.request.context, new_stor)
        except Exception as cpe:
            LOG.exception(cpe)
            # Delete the partially configure istor
            pecan.request.dbapi.istor_destroy(new_stor.uuid)
            raise wsme.exc.ClientSideError(_(
                "Internal error: failed to create a storage object. "
                "Make sure storage cluster is up and healthy."))
        if iprofile:
            new_stor = pecan.request.dbapi.istor_update(new_stor.uuid,
                                                        {'osdid': None})
        else:
            # Update the database record
            new_stor.save(pecan.request.context)
    # Associate the disk to db record
    values = {'foristorid': new_stor.id}
    pecan.request.dbapi.idisk_update(idisk_uuid,
--- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/storage_ceph.py
+++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/storage_ceph.py
@ -339,7 +339,6 @@ class StorageCephController(rest.RestController):
    @wsme_pecan.wsexpose(None, types.uuid, status_code=204)
    def delete(self, storageceph_uuid):
        """Delete a backend."""
        return _delete(storageceph_uuid)
@ -696,14 +695,8 @@ def _apply_backend_changes(op, sb_obj):
    services = api_helper.getListFromServices(sb_obj.as_dict())
    if op == constants.SB_API_OP_CREATE:
-        if sb_obj.name == constants.SB_DEFAULT_NAMES[
+        if sb_obj.name != constants.SB_DEFAULT_NAMES[
                constants.SB_TYPE_CEPH]:
            # Apply manifests for primary tier
            pecan.request.rpcapi.update_ceph_config(pecan.request.context,
                                                    sb_obj.uuid,
                                                    services)
        else:
            # Enable the service(s) use of the backend
            if constants.SB_SVC_CINDER in services:
                pecan.request.rpcapi.update_ceph_services(
@ -800,10 +793,21 @@ def _set_defaults(storage_ceph):
        'kube_pool_gib': None,
        'object_gateway': False,
    }
    sc = api_helper.set_backend_data(storage_ceph,
                                     defaults,
                                     CAPABILITIES,
                                     constants.SB_CEPH_SVCS_SUPPORTED)
    # Ceph is our default storage backend and is added at configuration
    # set state and task accordingly.
    if sc['name'] == constants.SB_DEFAULT_NAMES[constants.SB_TYPE_CEPH]:
        sc['state'] = constants.SB_STATE_CONFIGURED
        if utils.is_aio_simplex_system(pecan.request.dbapi):
            sc['task'] = None
        else:
            sc['task'] = constants.SB_TASK_RECONFIG_CONTROLLER
    return sc
--- a/sysinv/sysinv/sysinv/sysinv/common/ceph.py
+++ b/sysinv/sysinv/sysinv/sysinv/common/ceph.py
@ -24,6 +24,8 @@ import pecan
 import os
 import requests
 from sysinv.api.controllers.v1.utils import is_aio_system
 LOG = logging.getLogger(__name__)
@ -704,11 +706,36 @@ class CephApiOperator(object):
 def fix_crushmap(dbapi=None):
    """ Set Ceph's CRUSH Map based on storage model """
    def _create_crushmap_flag_file():
        try:
            open(crushmap_flag_file, "w").close()
        except IOError as e:
            LOG.warn(_('Failed to create flag file: {}. '
                       'Reason: {}').format(crushmap_flag_file, e))
    if not dbapi:
        dbapi = pecan.request.dbapi
    crushmap_flag_file = os.path.join(constants.SYSINV_CONFIG_PATH,
                                      constants.CEPH_CRUSH_MAP_APPLIED)
    if not os.path.isfile(crushmap_flag_file):
        _operator = CephApiOperator()
        if not is_aio_system(dbapi):
            # At least two monitors have to be running on a standard deployment,
            # otherwise don't even try to load the crushmap.
            active_mons, required_mons, __ = _operator.get_monitors_status(dbapi)
            if required_mons > active_mons:
                LOG.info("Not enough monitors yet available to fix crushmap.")
                return False
        # Crushmap may be already loaded thorough puppet, avoid doing it twice.
        default_ceph_tier_name = constants.SB_TIER_DEFAULT_NAMES[
                constants.SB_TIER_TYPE_CEPH] + constants.CEPH_CRUSH_TIER_SUFFIX
        rule_is_present, __, __ = _operator._crush_rule_status(default_ceph_tier_name)
        if rule_is_present:
            _create_crushmap_flag_file()
            return False
        stor_model = get_ceph_storage_model(dbapi)
        if stor_model == constants.CEPH_AIO_SX_MODEL:
            crushmap_txt = "/etc/sysinv/crushmap-aio-sx.txt"
@ -731,13 +758,10 @@ def fix_crushmap(dbapi=None):
            # May not be critical, depends on where this is called.
            reason = "Error: %s Output: %s" % (str(e), e.output)
            raise exception.CephCrushMapNotApplied(reason=reason)
-        try:
+        _create_crushmap_flag_file()
            open(crushmap_flag_file, "w").close()
        except IOError as e:
            LOG.warn(_('Failed to create flag file: {}. '
                       'Reason: {}').format(crushmap_flag_file, e))
        return True
    return False
 def get_ceph_storage_model(dbapi=None):
--- a/sysinv/sysinv/sysinv/sysinv/common/storage_backend_conf.py
+++ b/sysinv/sysinv/sysinv/sysinv/common/storage_backend_conf.py
@ -220,6 +220,7 @@ class StorageBackendConfig(object):
        }
        ceph_mons = dbapi.ceph_mon_get_list()
        ceph_mon = None
        for ceph_mon in ceph_mons:
            if ceph_mon['hostname'] == constants.CONTROLLER_0_HOSTNAME:
                targets.update({'%s-%s' % (constants.CONTROLLER_0_HOSTNAME,
@ -231,8 +232,6 @@ class StorageBackendConfig(object):
                targets.update({'%s-%s' % (ceph_mon['hostname'],
                                           network_type): 'ceph-mon-2-ip'})
        ceph_mon['ceph_mon_gib'] = ceph_mons[0]['ceph_mon_gib']
        results = {}
        addrs = dbapi.addresses_get_all()
        for addr in addrs:
--- a/sysinv/sysinv/sysinv/sysinv/conductor/ceph.py
+++ b/sysinv/sysinv/sysinv/sysinv/conductor/ceph.py
@ -124,7 +124,6 @@ class CephOperator(object):
        # cluster UUID value that is valid and consistent for the state of the
        # installation. Also make sure that we have a cluster DB entry
        # established
        LOG.debug("_init_db_cluster_and_tier: Reteiving cluster record")
        try:
            self._db_cluster = self._db_api.clusters_get_all(
                type=constants.CINDER_BACKEND_CEPH)[0]
@ -132,7 +131,7 @@ class CephOperator(object):
                # Retrieve ceph cluster fsid and update database
                fsid = self._get_fsid()
                if uuidutils.is_uuid_like(fsid):
-                    LOG.debug("Update cluster record: fsid=%s." % fsid)
+                    LOG.info("Update cluster record: fsid=%s." % fsid)
                    self._db_cluster.cluster_uuid = fsid
                    self._db_api.cluster_update(
                        self.cluster_db_uuid,
@ -155,7 +154,7 @@ class CephOperator(object):
        # Try to use ceph cluster fsid
        fsid = self._get_fsid()
-        LOG.info("Create new cluster record: fsid=%s." % fsid)
+        LOG.info("Create new ceph cluster record: fsid=%s." % fsid)
        # Create the default primary cluster
        self._db_cluster = self._db_api.cluster_create(
            {'uuid': fsid if uuidutils.is_uuid_like(fsid) else str(uuid.uuid4()),
@ -165,6 +164,7 @@ class CephOperator(object):
             'system_id': isystem.id})
        # Create the default primary ceph storage tier
        LOG.info("Create primary ceph tier record.")
        self._db_primary_tier = self._db_api.storage_tier_create(
            {'forclusterid': self.cluster_id,
             'name': constants.SB_TIER_DEFAULT_NAMES[constants.SB_TIER_TYPE_CEPH],
@ -831,67 +831,6 @@ class CephOperator(object):
                    name=rule_name, reason=body['status'])
                raise e
    # TODO(CephPoolsDecouple): remove
    def configure_osd_pools(self, ceph_backend=None, new_pool_size=None, new_pool_min_size=None):
        """Create or resize all of the osd pools as needed
           ceph backend could be 2nd backend which is in configuring state
        """
        # Handle pools for multiple tiers
        tiers = self._db_api.storage_tier_get_by_cluster(self.cluster_db_uuid)
        ceph_tiers = [t for t in tiers if t.type == constants.SB_TIER_TYPE_CEPH]
        ceph_backends = self._db_api.storage_ceph_get_list()
        for t in ceph_tiers:
            # Get corresponding ceph backend for the tier, if any
            bk = None
            for bk in ceph_backends:
                if t.forbackendid == bk.id:
                    break
            # Get pool replication parameters
            pool_size, pool_min_size = StorageBackendConfig.get_ceph_pool_replication(self._db_api, bk)
            if bk and ceph_backend and bk.name == ceph_backend.name:
                # Override replication
                pool_size = new_pool_size if new_pool_size else pool_size
                pool_min_size = new_pool_min_size if new_pool_min_size else pool_min_size
            # Configure tier OSD pools
            if t.uuid == self.primary_tier_uuid:
                # This is primary tier
                # In case we're updating pool_size to a different value than
                # default. Just update pool size for ceph's default pool 'rbd'
                # as well
                try:
                    self._configure_primary_tier_pool(
                        {'pool_name': constants.CEPH_POOL_RBD_NAME,
                         'pg_num': constants.CEPH_POOL_RBD_PG_NUM,
                         'pgp_num': constants.CEPH_POOL_RBD_PGP_NUM},
                        pool_size,
                        pool_min_size)
                except exception.CephFailure:
                    pass
                # Handle primary tier pools (cinder/glance/swift/ephemeral)
                for pool in CEPH_POOLS:
                    # TODO(rchurch): The following is added for R3->R4 upgrades. Can we
                    # remove this for R5? Or is there some R3->R4->R5 need to keep this
                    # around.
                    try:
                        self.update_ceph_object_pool_name(pool)
                    except exception.CephFailure:
                        pass
                    self._configure_primary_tier_pool(pool, pool_size,
                                                      pool_min_size)
            else:
                try:
                    self._configure_secondary_tier_pools(t, pool_size,
                                                         pool_min_size)
                except exception.CephPoolRulesetFailure as e:
                    LOG.info("Cannot add pools: %s" % e)
                except exception.CephFailure as e:
                    LOG.info("Cannot add pools: %s" % e)
    def _update_db_capabilities(self, bk, new_storceph):
        # Avoid updating DB for all capabilities in new_storceph as we
        # don't manage them. Leave the callers deal with it.
--- a/sysinv/sysinv/sysinv/sysinv/conductor/manager.py
+++ b/sysinv/sysinv/sysinv/sysinv/conductor/manager.py
@ -283,15 +283,6 @@ class ConductorManager(service.PeriodicService):
        self.dbapi.remotelogging_create(system_id_attribute_value)
        self.dbapi.ptp_create(system_id_attribute_value)
        # set default storage_backend
        values.update({'backend': constants.SB_TYPE_FILE,
                       'name': constants.SB_DEFAULT_NAMES[constants.SB_TYPE_FILE],
                       'state': constants.SB_STATE_CONFIGURED,
                       'task': constants.SB_TASK_NONE,
                       'services': None,
                       'capabilities': {}})
        self.dbapi.storage_backend_create(values)
        # populate service table
        for optional_service in constants.ALL_OPTIONAL_SERVICES:
            self.dbapi.service_create({'name': optional_service,
@ -4378,8 +4369,9 @@ class ConductorManager(service.PeriodicService):
        if availability == constants.AVAILABILITY_AVAILABLE:
            if imsg_dict.get(constants.SYSINV_AGENT_FIRST_REPORT):
-                # This should be run once after a boot
+                # This should be run once after a node boot
                self._clear_ceph_stor_state(ihost_uuid)
                cceph.fix_crushmap(self.dbapi)
            config_uuid = imsg_dict['config_applied']
            self._update_host_config_applied(context, ihost, config_uuid)
@ -5215,39 +5207,6 @@ class ConductorManager(service.PeriodicService):
        # Not sure yet what the proper response is here
        pass
    def configure_osd_istor(self, context, istor_obj):
        """Synchronously, have a conductor configure an OSD istor.
        Does the following tasks:
        - Allocates an OSD.
        - Creates or resizes an OSD pool as necessary.
        :param context: request context.
        :param istor_obj: an istor object.
        :returns: istor object, with updated osdid
        """
        if istor_obj['osdid']:
            LOG.error("OSD already assigned: %s", str(istor_obj['osdid']))
            raise exception.SysinvException(_(
                "Invalid method call: osdid already assigned: %s") %
                    str(istor_obj['osdid']))
        # Create the OSD
        response, body = self._ceph.osd_create(istor_obj['uuid'], body='json')
        if not response.ok:
            LOG.error("OSD create failed: %s", response.reason)
            response.raise_for_status()
        # Update the osdid in the stor object
        istor_obj['osdid'] = body['output']['osdid']
        # TODO(CephPoolsDecouple): remove
        if not utils.is_kubernetes_config(self.dbapi):
            self._ceph.configure_osd_pools()
        return istor_obj
    def restore_ceph_config(self, context, after_storage_enabled=False):
        """Restore Ceph configuration during Backup and Restore process.
--- a/sysinv/sysinv/sysinv/sysinv/puppet/ceph.py
+++ b/sysinv/sysinv/sysinv/sysinv/puppet/ceph.py
@ -47,6 +47,9 @@ class CephPuppet(openstack.OpenstackBasePuppet):
        ceph_mon_ips = StorageBackendConfig.get_ceph_mon_ip_addresses(
            self.dbapi)
        if not ceph_mon_ips:
            return {}  # system configuration is not yet ready
        controller_hosts = [constants.CONTROLLER_0_HOSTNAME, constants.CONTROLLER_1_HOSTNAME]
        mon_2_host = [mon['hostname'] for mon in self.dbapi.ceph_mon_get_list() if
                      mon['hostname'] not in controller_hosts]