diff --git a/controllerconfig/controllerconfig/controllerconfig/configassistant.py b/controllerconfig/controllerconfig/controllerconfig/configassistant.py index 5185abfc27..82a02115ca 100644 --- a/controllerconfig/controllerconfig/controllerconfig/configassistant.py +++ b/controllerconfig/controllerconfig/controllerconfig/configassistant.py @@ -5077,6 +5077,15 @@ class ConfigAssistant(): } client.sysinv.route.create(**values) + def _populate_default_storage_backend(self, client, controller): + # Create the Ceph monitor for controller-0 + values = {'ihost_uuid': controller.uuid} + client.sysinv.ceph_mon.create(**values) + + # Create the Ceph default backend + values = {'confirmed': True} + client.sysinv.storage_ceph.create(**values) + def _populate_infrastructure_interface(self, client, controller): """Configure the infrastructure interface(s)""" if not self.infrastructure_interface: @@ -5349,6 +5358,7 @@ class ConfigAssistant(): # ceph_mon config requires controller host to be created self._inventory_config_complete_wait(client, controller) self._populate_interface_config(client, controller) + self._populate_default_storage_backend(client, controller) except (KeystoneFail, SysInvFail) as e: LOG.exception(e) diff --git a/playbookconfig/playbookconfig/playbooks/bootstrap/roles/persist-config/files/populate_initial_config.py b/playbookconfig/playbookconfig/playbooks/bootstrap/roles/persist-config/files/populate_initial_config.py index 60360e722c..3f55c094fa 100644 --- a/playbookconfig/playbookconfig/playbooks/bootstrap/roles/persist-config/files/populate_initial_config.py +++ b/playbookconfig/playbookconfig/playbooks/bootstrap/roles/persist-config/files/populate_initial_config.py @@ -19,8 +19,8 @@ import time # constants in controllerconfig. When it is time to remove/deprecate these # packages, classes OpenStack, Token and referenced constants need to be moved # to this standalone script. -from controllerconfig import ConfigFail from controllerconfig.common import constants +from controllerconfig import ConfigFail from controllerconfig import openstack from controllerconfig import sysinv_api as sysinv @@ -711,6 +711,19 @@ def inventory_config_complete_wait(client, controller): wait_pv_config(client, controller) +def populate_default_storage_backend(client, controller): + if not INITIAL_POPULATION: + return + + print("Populating ceph-mon config for controller-0...") + values = {'ihost_uuid': controller.uuid} + client.sysinv.ceph_mon.create(**values) + + print("Populating ceph storage backend config...") + values = {'confirmed': True} + client.sysinv.storage_ceph.create(**values) + + def handle_invalid_input(): raise Exception("Invalid input!\nUsage: " "[--system] [--network] [--service]") @@ -757,6 +770,7 @@ if __name__ == '__main__': populate_docker_config(client) controller = populate_controller_config(client) inventory_config_complete_wait(client, controller) + populate_default_storage_backend(client, controller) os.remove(config_file) if INITIAL_POPULATION: print("Successfully updated the initial system config.") diff --git a/puppet-manifests/src/modules/platform/lib/facter/is_initial_cinder_ceph_config.rb b/puppet-manifests/src/modules/platform/lib/facter/is_initial_cinder_ceph_config.rb deleted file mode 100644 index f9f05cf4c2..0000000000 --- a/puppet-manifests/src/modules/platform/lib/facter/is_initial_cinder_ceph_config.rb +++ /dev/null @@ -1,8 +0,0 @@ -# Returns true if cinder ceph needs to be configured - -Facter.add("is_initial_cinder_ceph_config") do - setcode do - conf_path = Facter::Core::Execution.exec("hiera --config /etc/puppet/hiera.yaml platform::params::config_path") - ! File.exist?(conf_path +'.initial_cinder_ceph_config_complete') - end -end diff --git a/puppet-manifests/src/modules/platform/lib/facter/is_initial_cinder_config.rb b/puppet-manifests/src/modules/platform/lib/facter/is_initial_cinder_config.rb deleted file mode 100644 index fe85d37d89..0000000000 --- a/puppet-manifests/src/modules/platform/lib/facter/is_initial_cinder_config.rb +++ /dev/null @@ -1,8 +0,0 @@ -# Returns true is this is the initial cinder config for this system - -Facter.add("is_initial_cinder_config") do - setcode do - conf_path = Facter::Core::Execution.exec("hiera --config /etc/puppet/hiera.yaml platform::params::config_path") - ! File.exist?(conf_path + '.initial_cinder_config_complete') - end -end diff --git a/puppet-manifests/src/modules/platform/lib/facter/is_initial_cinder_lvm_config.rb b/puppet-manifests/src/modules/platform/lib/facter/is_initial_cinder_lvm_config.rb deleted file mode 100644 index 2f99b414fb..0000000000 --- a/puppet-manifests/src/modules/platform/lib/facter/is_initial_cinder_lvm_config.rb +++ /dev/null @@ -1,8 +0,0 @@ -# Returns true if cinder lvm needs to be configured - -Facter.add("is_initial_cinder_lvm_config") do - setcode do - conf_path = Facter::Core::Execution.exec("hiera --config /etc/puppet/hiera.yaml platform::params::config_path") - ! File.exist?(conf_path + '.initial_cinder_lvm_config_complete') - end -end diff --git a/puppet-manifests/src/modules/platform/lib/facter/is_node_cinder_lvm_config.rb b/puppet-manifests/src/modules/platform/lib/facter/is_node_cinder_lvm_config.rb deleted file mode 100644 index af6cba6ffd..0000000000 --- a/puppet-manifests/src/modules/platform/lib/facter/is_node_cinder_lvm_config.rb +++ /dev/null @@ -1,7 +0,0 @@ -# Returns true if cinder LVM needs to be configured on current node - -Facter.add("is_node_cinder_lvm_config") do - setcode do - ! File.exist?('/etc/platform/.node_cinder_lvm_config_complete') - end -end diff --git a/puppet-manifests/src/modules/platform/manifests/ceph.pp b/puppet-manifests/src/modules/platform/manifests/ceph.pp index b40c3c8839..a637b3ea0c 100644 --- a/puppet-manifests/src/modules/platform/manifests/ceph.pp +++ b/puppet-manifests/src/modules/platform/manifests/ceph.pp @@ -153,11 +153,11 @@ class platform::ceph::post } if $service_enabled { + # Ceph configuration on this node is done file { $node_ceph_configured_flag: ensure => present } } - } @@ -169,8 +169,8 @@ class platform::ceph::monitor if $service_enabled { if $system_type == 'All-in-one' and 'duplex' in $system_mode { - if str2bool($::is_controller_active) { - # Ceph mon is configured on a DRBD partition, on the active controller, + if str2bool($::is_standalone_controller) { + # Ceph mon is configured on a DRBD partition, # when 'ceph' storage backend is added in sysinv. # Then SM takes care of starting ceph after manifests are applied. $configure_ceph_mon = true @@ -236,6 +236,31 @@ class platform::ceph::monitor # ensure configuration is complete before creating monitors Class['::ceph'] -> Ceph::Mon <| |> + # ensure we load the crushmap at first unlock + if $system_type == 'All-in-one' and str2bool($::is_standalone_controller) { + if 'duplex' in $system_mode { + $crushmap_txt = '/etc/sysinv/crushmap-controller-model.txt' + } else { + $crushmap_txt = '/etc/sysinv/crushmap-aio-sx.txt' + } + $crushmap_bin = '/etc/sysinv/crushmap.bin' + Ceph::Mon <| |> + -> exec { 'Compile crushmap': + command => "crushtool -c ${crushmap_txt} -o ${crushmap_bin}", + onlyif => "test ! -f ${crushmap_bin}", + logoutput => true, + } + -> exec { 'Set crushmap': + command => "ceph osd setcrushmap -i ${crushmap_bin}", + unless => 'ceph osd crush rule list --format plain | grep -e "storage_tier_ruleset"', + logoutput => true, + } + -> Platform_ceph_osd <| |> + } + + # Ensure networking is up before Monitors are configured + Anchor['platform::networking'] -> Ceph::Mon <| |> + # default configuration for all ceph monitor resources Ceph::Mon { fsid => $cluster_uuid, @@ -248,33 +273,10 @@ class platform::ceph::monitor public_addr => $floating_mon_ip, } - if (str2bool($::is_controller_active) and - str2bool($::is_initial_cinder_ceph_config) and - !str2bool($::is_standalone_controller)) { + # On AIO-DX there is a single, floating, Ceph monitor backed by DRBD. + # Therefore DRBD must be up before Ceph monitor is configured + Drbd::Resource <| |> -> Ceph::Mon <| |> - - # When we configure ceph after both controllers are active, - # we need to stop the monitor, unmount the monitor partition - # and set the drbd role to secondary, so that the handoff to - # SM is done properly once we swact to the standby controller. - # TODO: Remove this once SM supports in-service config reload. - Ceph::Mon <| |> - -> exec { 'Stop Ceph monitor': - command =>'/etc/init.d/ceph stop mon', - onlyif => '/etc/init.d/ceph status mon', - logoutput => true, - } - -> exec { 'umount ceph-mon partition': - command => "umount ${mon_mountpoint}", - onlyif => "mount | grep -q ${mon_mountpoint}", - logoutput => true, - } - -> exec { 'Set cephmon secondary': - command => 'drbdadm secondary drbd-cephmon', - unless => "drbdadm role drbd-cephmon | egrep '^Secondary'", - logoutput => true, - } - } } else { if $::hostname == $mon_0_host { ceph::mon { $mon_0_host: @@ -295,8 +297,7 @@ class platform::ceph::monitor } } - -define platform_ceph_osd( +define osd_crush_location( $osd_id, $osd_uuid, $disk_path, @@ -311,12 +312,28 @@ define platform_ceph_osd( "osd.${$osd_id}/crush_location": value => "root=${tier_name}-tier host=${$::platform::params::hostname}-${$tier_name}"; } } - file { "/var/lib/ceph/osd/ceph-${osd_id}": +} + +define platform_ceph_osd( + $osd_id, + $osd_uuid, + $disk_path, + $data_path, + $journal_path, + $tier_name, +) { + + Anchor['platform::networking'] # Make sure networking is up before running ceph commands + -> file { "/var/lib/ceph/osd/ceph-${osd_id}": ensure => 'directory', owner => 'root', group => 'root', mode => '0755', } + -> exec { "ceph osd create ${osd_uuid} ${osd_id}": + logoutput => true, + command => template('platform/ceph.osd.create.erb'), + } -> ceph::osd { $disk_path: uuid => $osd_uuid, } @@ -351,8 +368,13 @@ class platform::ceph::osds( mode => '0755', } + # Ensure ceph.conf is complete before configuring OSDs + Class['::ceph'] -> Platform_ceph_osd <| |> + # Journal disks need to be prepared before the OSDs are configured Platform_ceph_journal <| |> -> Platform_ceph_osd <| |> + # Crush locations in ceph.conf need to be set before the OSDs are configured + Osd_crush_location <| |> -> Platform_ceph_osd <| |> # default configuration for all ceph object resources Ceph::Osd { @@ -360,6 +382,7 @@ class platform::ceph::osds( cluster_uuid => $cluster_uuid, } + create_resources('osd_crush_location', $osd_config) create_resources('platform_ceph_osd', $osd_config) create_resources('platform_ceph_journal', $journal_config) } @@ -479,6 +502,7 @@ class platform::ceph::runtime_base { class platform::ceph::runtime_osds { include ::ceph::params + include ::platform::ceph include ::platform::ceph::osds # Since this is runtime we have to avoid checking status of Ceph while we diff --git a/puppet-manifests/src/modules/platform/manifests/drbd.pp b/puppet-manifests/src/modules/platform/manifests/drbd.pp index af463f5b17..b8b277e247 100644 --- a/puppet-manifests/src/modules/platform/manifests/drbd.pp +++ b/puppet-manifests/src/modules/platform/manifests/drbd.pp @@ -445,16 +445,13 @@ class platform::drbd::cephmon () $system_mode = $::platform::params::system_mode $system_type = $::platform::params::system_type - #TODO: This will change once we remove the native cinder service - if (str2bool($::is_initial_config_primary) or - (str2bool($::is_controller_active) and str2bool($::is_initial_cinder_ceph_config)) - ){ + if str2bool($::is_standalone_controller) and ! str2bool($::is_node_ceph_configured) { # Active controller, first time configuration. $drbd_primary = true $drbd_initial = true $drbd_automount = true - } elsif str2bool($::is_standalone_controller){ + } elsif str2bool($::is_standalone_controller) { # Active standalone controller, successive reboots. $drbd_primary = true $drbd_initial = undef @@ -490,9 +487,9 @@ class platform::drbd( $service_enable = false, $service_ensure = 'stopped', ) { - if (str2bool($::is_initial_config_primary) + if (str2bool($::is_initial_config_primary) or str2bool($::is_standalone_controller) ){ - # Enable DRBD at config_controller + # Enable DRBD on standalone class { '::drbd': service_enable => true, service_ensure => 'running', @@ -553,41 +550,55 @@ class platform::drbd::runtime { } } +class platform::drbd::runtime_service_enable { + + class { '::drbd': + service_enable => true, + service_ensure => 'running' + } +} class platform::drbd::pgsql::runtime { include ::platform::drbd::params + include ::platform::drbd::runtime_service_enable include ::platform::drbd::pgsql } class platform::drbd::cgcs::runtime { include ::platform::drbd::params + include ::platform::drbd::runtime_service_enable include ::platform::drbd::cgcs } class platform::drbd::extension::runtime { include ::platform::drbd::params + include ::platform::drbd::runtime_service_enable include ::platform::drbd::extension } class platform::drbd::patch_vault::runtime { include ::platform::drbd::params + include ::platform::drbd::runtime_service_enable include ::platform::drbd::patch_vault } class platform::drbd::etcd::runtime { include ::platform::drbd::params + include ::platform::drbd::runtime_service_enable include ::platform::drbd::etcd } class platform::drbd::dockerdistribution::runtime { include ::platform::drbd::params + include ::platform::drbd::runtime_service_enable include ::platform::drbd::dockerdistribution } class platform::drbd::cephmon::runtime { include ::platform::drbd::params + include ::platform::drbd::runtime_service_enable include ::platform::drbd::cephmon } diff --git a/puppet-manifests/src/modules/platform/templates/ceph.osd.create.erb b/puppet-manifests/src/modules/platform/templates/ceph.osd.create.erb new file mode 100644 index 0000000000..64dd21df84 --- /dev/null +++ b/puppet-manifests/src/modules/platform/templates/ceph.osd.create.erb @@ -0,0 +1,52 @@ +/bin/true # puppet requires this for correct template parsing + +# This is needed to pin a specific OSD id with a corresponding UUID. +# Problem is ceph-disk prepare doesn't accept ceph OSD id as cli +# parameter. Therefore, the OSD with desired OSD ID and UUID must +# exist before puppet ceph module executes ceph-disk prepare. + +set -x + +osd_id=<%= @osd_id %> +osd_uuid=<%= @osd_uuid %> + +# Ignore if Ceph is down, this case should only happen on DOR +timeout 10 ceph -s +if [ $? -ne 0 ]; then + exit 0 +fi + +# Check if OSD exists and has the correct UUID +osds=( $(ceph osd ls) ) +if [[ " ${osds[@]} " =~ " ${osd_id} " ]]; then + # Get UUID, this is slower than osd ls as it also lists PGs with problems + # but is the only way to get the uuid of an OSD. + found_uuid=$(ceph osd dump | grep "^osd.${osd_id} " | awk '{print $NF}') + if [ "${found_uuid}" != "${osd_uuid}" ]; then + # At B&R ceph's crushmap is restored but, although OSDs are properly + # allocated to their hosts in the tree, crushmap does not store + # OSD UUIDs. Therefore, w/o osd_id and uuid match, when the OSD is + # prepared there is a chance that ceph-disk will create a new OSD + # that will no longer match the osd id in sysinv db. So, we have + # to remove OSDs that don't match UUIDs and recreate them with + # expected OSD ID and UUID so that ceph-disk does not get confused. + ceph osd rm ${osd_id} + RET=$? + if [ $RET -ne 0 ]; then + echo "Error removing osd ${osd_id}, exit code: ${RET}" + exit $RET + fi + else + # OSD exists and has the correct uuid + exit 0 + fi +fi + +# Create the OSD with desired id and uuid +ceph osd create ${osd_uuid} ${osd_id} +RET=$? +if [ $RET -ne 0 ]; then + echo "Error creating osd ${osd_id}, exit code: ${RET}" + exit $RET +fi +set +x diff --git a/sysinv/cgts-client/cgts-client/cgtsclient/v1/storage_backend.py b/sysinv/cgts-client/cgts-client/cgtsclient/v1/storage_backend.py index f8e4d57624..2f1a311af9 100644 --- a/sysinv/cgts-client/cgts-client/cgtsclient/v1/storage_backend.py +++ b/sysinv/cgts-client/cgts-client/cgtsclient/v1/storage_backend.py @@ -11,8 +11,6 @@ from cgtsclient.common import base from cgtsclient.common import constants from cgtsclient.common import utils from cgtsclient import exc -from cgtsclient.v1 import ceph_mon as ceph_mon_utils -from cgtsclient.v1 import ihost as ihost_utils from cgtsclient.v1 import storage_ceph # noqa from cgtsclient.v1 import storage_ceph_external # noqa from cgtsclient.v1 import storage_external # noqa @@ -138,23 +136,6 @@ def _display_next_steps(): def backend_add(cc, backend, args): backend = backend.replace('-', '_') - # add ceph mons to controllers - if backend == constants.SB_TYPE_CEPH: - # Controllers should always have monitors. - # Not finding a controller means it's not yet configured, - # so move forward. - try: - ihost = ihost_utils._find_ihost(cc, constants.CONTROLLER_0_HOSTNAME) - ceph_mon_utils.ceph_mon_add(cc, args, ihost.uuid) - except exc.CommandError: - pass - - try: - ihost = ihost_utils._find_ihost(cc, constants.CONTROLLER_1_HOSTNAME) - ceph_mon_utils.ceph_mon_add(cc, args, ihost.uuid) - except exc.CommandError: - pass - # allowed storage_backend fields allowed_fields = ['name', 'services', 'confirmed', 'ceph_conf'] diff --git a/sysinv/sysinv/sysinv/sysinv/agent/manager.py b/sysinv/sysinv/sysinv/sysinv/agent/manager.py index 6abb4dba93..b6fdad2f0f 100644 --- a/sysinv/sysinv/sysinv/sysinv/agent/manager.py +++ b/sysinv/sysinv/sysinv/sysinv/agent/manager.py @@ -512,6 +512,10 @@ class AgentManager(service.PeriodicService): msg_dict.update({constants.HOST_ACTION_STATE: constants.HAS_REINSTALLED}) + # Is this the first time since boot we are reporting to conductor? + msg_dict.update({constants.SYSINV_AGENT_FIRST_REPORT: + not os.path.exists(SYSINV_FIRST_REPORT_FLAG)}) + try: rpcapi.iplatform_update_by_ihost(context, host_uuid, @@ -916,10 +920,6 @@ class AgentManager(service.PeriodicService): if iscsi_initiator_name is not None: imsg_dict.update({'iscsi_initiator_name': iscsi_initiator_name}) - # Is this the first time since boot we are reporting to conductor? - imsg_dict.update({constants.SYSINV_AGENT_FIRST_REPORT: - not os.path.exists(SYSINV_FIRST_REPORT_FLAG)}) - self.platform_update_by_host(rpcapi, icontext, self._ihost_uuid, diff --git a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/ceph_mon.py b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/ceph_mon.py index 9474a89267..f8e9822c72 100644 --- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/ceph_mon.py +++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/ceph_mon.py @@ -446,9 +446,10 @@ def _create(ceph_mon): "replication is set to: %s'. Please update replication " "before configuring a monitor on a worker node." % supported_replication)) - # host must be locked and online - if (chost['availability'] != constants.AVAILABILITY_ONLINE or - chost['administrative'] != constants.ADMIN_LOCKED): + # host must be locked and online unless this is controller-0 + if (chost['hostname'] != constants.CONTROLLER_0_HOSTNAME and + (chost['availability'] != constants.AVAILABILITY_ONLINE or + chost['administrative'] != constants.ADMIN_LOCKED)): raise wsme.exc.ClientSideError( _("Host %s must be locked and online." % chost['hostname'])) diff --git a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py index b967668a0d..f449b5894b 100644 --- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py +++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/host.py @@ -5295,7 +5295,7 @@ class HostController(rest.RestController): pass elif StorageBackendConfig.has_backend_configured( pecan.request.dbapi, - constants.CINDER_BACKEND_CEPH): + constants.SB_TYPE_CEPH): if utils.is_aio_simplex_system(pecan.request.dbapi): # Check if host has enough OSDs configured for each tier tiers = pecan.request.dbapi.storage_tier_get_all() diff --git a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/storage.py b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/storage.py index 6a3187e993..96d29adbab 100644 --- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/storage.py +++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/storage.py @@ -471,23 +471,33 @@ class StorageController(rest.RestController): except Exception as e: LOG.exception(e) raise - # Make sure that we are allowed to delete _check_host(stor) # Delete the stor if supported + ihost_id = stor['forihostid'] + ihost = pecan.request.dbapi.ihost_get(ihost_id) if stor.function == constants.STOR_FUNCTION_JOURNAL: # Host must be locked - ihost_id = stor['forihostid'] - ihost = pecan.request.dbapi.ihost_get(ihost_id) if ihost['administrative'] != constants.ADMIN_LOCKED: raise wsme.exc.ClientSideError(_("Host %s must be locked." % ihost['hostname'])) self.delete_stor(stor_uuid) + elif (stor.function == constants.STOR_FUNCTION_OSD and + stor.state == constants.SB_STATE_CONFIGURING_ON_UNLOCK): + # Host must be locked + if ihost['administrative'] != constants.ADMIN_LOCKED: + raise wsme.exc.ClientSideError(_("Host %s must be locked." % + ihost['hostname'])) + + self.delete_stor(stor_uuid) else: raise wsme.exc.ClientSideError(_( - "Deleting a Storage Function other than %s is not " - "supported on this setup") % constants.STOR_FUNCTION_JOURNAL) + "Deleting a Storage Function other than '%s' and '%s' in " + "state '%s' is not supported on this setup.") % + (constants.STOR_FUNCTION_JOURNAL, + constants.STOR_FUNCTION_OSD, + constants.SB_STATE_CONFIGURING_ON_UNLOCK)) def delete_stor(self, stor_uuid): """Delete a stor""" @@ -497,10 +507,10 @@ class StorageController(rest.RestController): try: # The conductor will handle removing the stor, not all functions # need special handling - if stor.function == constants.STOR_FUNCTION_OSD: - pecan.request.rpcapi.unconfigure_osd_istor(pecan.request.context, - stor) - elif stor.function == constants.STOR_FUNCTION_JOURNAL: + # if stor.function == constants.STOR_FUNCTION_OSD: + # pecan.request.rpcapi.unconfigure_osd_istor(pecan.request.context, + # stor) + if stor.function == constants.STOR_FUNCTION_JOURNAL: pecan.request.dbapi.istor_disable_journal(stor_uuid) # Now remove the stor from DB pecan.request.dbapi.istor_remove_disk_association(stor_uuid) @@ -901,29 +911,21 @@ def _create(stor, iprofile=None): "Invalid stor device type: only SSD and NVME devices are supported" " for journal functions.")) + if osd_create is True: + # Get the next free OSD ID in the system + stors = pecan.request.dbapi.istor_get_list(sort_key='osdid', sort_dir='asc') + stors_ids = [s['osdid'] for s in stors if s['osdid'] is not None] + if stors_ids: + candidate_ids = [i for i in range(0, stors_ids[-1] + 2) if i not in stors_ids] + create_attrs['osdid'] = candidate_ids[0] + else: + create_attrs['osdid'] = 0 + else: + create_attrs['osdid'] = None + new_stor = pecan.request.dbapi.istor_create(forihostid, create_attrs) - # Create an osd associated with disk. - if osd_create is True: - try: - new_stor = pecan.request.rpcapi.configure_osd_istor( - pecan.request.context, new_stor) - except Exception as cpe: - LOG.exception(cpe) - # Delete the partially configure istor - pecan.request.dbapi.istor_destroy(new_stor.uuid) - raise wsme.exc.ClientSideError(_( - "Internal error: failed to create a storage object. " - "Make sure storage cluster is up and healthy.")) - - if iprofile: - new_stor = pecan.request.dbapi.istor_update(new_stor.uuid, - {'osdid': None}) - else: - # Update the database record - new_stor.save(pecan.request.context) - # Associate the disk to db record values = {'foristorid': new_stor.id} pecan.request.dbapi.idisk_update(idisk_uuid, diff --git a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/storage_ceph.py b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/storage_ceph.py index d975d399fa..53f90adc1e 100644 --- a/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/storage_ceph.py +++ b/sysinv/sysinv/sysinv/sysinv/api/controllers/v1/storage_ceph.py @@ -339,7 +339,6 @@ class StorageCephController(rest.RestController): @wsme_pecan.wsexpose(None, types.uuid, status_code=204) def delete(self, storageceph_uuid): """Delete a backend.""" - return _delete(storageceph_uuid) @@ -696,14 +695,8 @@ def _apply_backend_changes(op, sb_obj): services = api_helper.getListFromServices(sb_obj.as_dict()) if op == constants.SB_API_OP_CREATE: - if sb_obj.name == constants.SB_DEFAULT_NAMES[ + if sb_obj.name != constants.SB_DEFAULT_NAMES[ constants.SB_TYPE_CEPH]: - # Apply manifests for primary tier - pecan.request.rpcapi.update_ceph_config(pecan.request.context, - sb_obj.uuid, - services) - - else: # Enable the service(s) use of the backend if constants.SB_SVC_CINDER in services: pecan.request.rpcapi.update_ceph_services( @@ -800,10 +793,21 @@ def _set_defaults(storage_ceph): 'kube_pool_gib': None, 'object_gateway': False, } + sc = api_helper.set_backend_data(storage_ceph, defaults, CAPABILITIES, constants.SB_CEPH_SVCS_SUPPORTED) + + # Ceph is our default storage backend and is added at configuration + # set state and task accordingly. + if sc['name'] == constants.SB_DEFAULT_NAMES[constants.SB_TYPE_CEPH]: + sc['state'] = constants.SB_STATE_CONFIGURED + if utils.is_aio_simplex_system(pecan.request.dbapi): + sc['task'] = None + else: + sc['task'] = constants.SB_TASK_RECONFIG_CONTROLLER + return sc diff --git a/sysinv/sysinv/sysinv/sysinv/common/ceph.py b/sysinv/sysinv/sysinv/sysinv/common/ceph.py index a2e5a7cf1e..0ace3e569f 100644 --- a/sysinv/sysinv/sysinv/sysinv/common/ceph.py +++ b/sysinv/sysinv/sysinv/sysinv/common/ceph.py @@ -24,6 +24,8 @@ import pecan import os import requests +from sysinv.api.controllers.v1.utils import is_aio_system + LOG = logging.getLogger(__name__) @@ -704,11 +706,36 @@ class CephApiOperator(object): def fix_crushmap(dbapi=None): """ Set Ceph's CRUSH Map based on storage model """ + def _create_crushmap_flag_file(): + try: + open(crushmap_flag_file, "w").close() + except IOError as e: + LOG.warn(_('Failed to create flag file: {}. ' + 'Reason: {}').format(crushmap_flag_file, e)) + if not dbapi: dbapi = pecan.request.dbapi crushmap_flag_file = os.path.join(constants.SYSINV_CONFIG_PATH, constants.CEPH_CRUSH_MAP_APPLIED) + if not os.path.isfile(crushmap_flag_file): + _operator = CephApiOperator() + if not is_aio_system(dbapi): + # At least two monitors have to be running on a standard deployment, + # otherwise don't even try to load the crushmap. + active_mons, required_mons, __ = _operator.get_monitors_status(dbapi) + if required_mons > active_mons: + LOG.info("Not enough monitors yet available to fix crushmap.") + return False + + # Crushmap may be already loaded thorough puppet, avoid doing it twice. + default_ceph_tier_name = constants.SB_TIER_DEFAULT_NAMES[ + constants.SB_TIER_TYPE_CEPH] + constants.CEPH_CRUSH_TIER_SUFFIX + rule_is_present, __, __ = _operator._crush_rule_status(default_ceph_tier_name) + if rule_is_present: + _create_crushmap_flag_file() + return False + stor_model = get_ceph_storage_model(dbapi) if stor_model == constants.CEPH_AIO_SX_MODEL: crushmap_txt = "/etc/sysinv/crushmap-aio-sx.txt" @@ -731,13 +758,10 @@ def fix_crushmap(dbapi=None): # May not be critical, depends on where this is called. reason = "Error: %s Output: %s" % (str(e), e.output) raise exception.CephCrushMapNotApplied(reason=reason) - try: - open(crushmap_flag_file, "w").close() - except IOError as e: - LOG.warn(_('Failed to create flag file: {}. ' - 'Reason: {}').format(crushmap_flag_file, e)) + _create_crushmap_flag_file() return True + return False def get_ceph_storage_model(dbapi=None): diff --git a/sysinv/sysinv/sysinv/sysinv/common/storage_backend_conf.py b/sysinv/sysinv/sysinv/sysinv/common/storage_backend_conf.py index 0faeb0811d..2d099cf27a 100644 --- a/sysinv/sysinv/sysinv/sysinv/common/storage_backend_conf.py +++ b/sysinv/sysinv/sysinv/sysinv/common/storage_backend_conf.py @@ -220,6 +220,7 @@ class StorageBackendConfig(object): } ceph_mons = dbapi.ceph_mon_get_list() + ceph_mon = None for ceph_mon in ceph_mons: if ceph_mon['hostname'] == constants.CONTROLLER_0_HOSTNAME: targets.update({'%s-%s' % (constants.CONTROLLER_0_HOSTNAME, @@ -231,8 +232,6 @@ class StorageBackendConfig(object): targets.update({'%s-%s' % (ceph_mon['hostname'], network_type): 'ceph-mon-2-ip'}) - ceph_mon['ceph_mon_gib'] = ceph_mons[0]['ceph_mon_gib'] - results = {} addrs = dbapi.addresses_get_all() for addr in addrs: diff --git a/sysinv/sysinv/sysinv/sysinv/conductor/ceph.py b/sysinv/sysinv/sysinv/sysinv/conductor/ceph.py index 23903244b2..de868d81eb 100644 --- a/sysinv/sysinv/sysinv/sysinv/conductor/ceph.py +++ b/sysinv/sysinv/sysinv/sysinv/conductor/ceph.py @@ -124,7 +124,6 @@ class CephOperator(object): # cluster UUID value that is valid and consistent for the state of the # installation. Also make sure that we have a cluster DB entry # established - LOG.debug("_init_db_cluster_and_tier: Reteiving cluster record") try: self._db_cluster = self._db_api.clusters_get_all( type=constants.CINDER_BACKEND_CEPH)[0] @@ -132,7 +131,7 @@ class CephOperator(object): # Retrieve ceph cluster fsid and update database fsid = self._get_fsid() if uuidutils.is_uuid_like(fsid): - LOG.debug("Update cluster record: fsid=%s." % fsid) + LOG.info("Update cluster record: fsid=%s." % fsid) self._db_cluster.cluster_uuid = fsid self._db_api.cluster_update( self.cluster_db_uuid, @@ -155,7 +154,7 @@ class CephOperator(object): # Try to use ceph cluster fsid fsid = self._get_fsid() - LOG.info("Create new cluster record: fsid=%s." % fsid) + LOG.info("Create new ceph cluster record: fsid=%s." % fsid) # Create the default primary cluster self._db_cluster = self._db_api.cluster_create( {'uuid': fsid if uuidutils.is_uuid_like(fsid) else str(uuid.uuid4()), @@ -165,6 +164,7 @@ class CephOperator(object): 'system_id': isystem.id}) # Create the default primary ceph storage tier + LOG.info("Create primary ceph tier record.") self._db_primary_tier = self._db_api.storage_tier_create( {'forclusterid': self.cluster_id, 'name': constants.SB_TIER_DEFAULT_NAMES[constants.SB_TIER_TYPE_CEPH], @@ -831,67 +831,6 @@ class CephOperator(object): name=rule_name, reason=body['status']) raise e - # TODO(CephPoolsDecouple): remove - def configure_osd_pools(self, ceph_backend=None, new_pool_size=None, new_pool_min_size=None): - """Create or resize all of the osd pools as needed - ceph backend could be 2nd backend which is in configuring state - """ - # Handle pools for multiple tiers - tiers = self._db_api.storage_tier_get_by_cluster(self.cluster_db_uuid) - ceph_tiers = [t for t in tiers if t.type == constants.SB_TIER_TYPE_CEPH] - ceph_backends = self._db_api.storage_ceph_get_list() - - for t in ceph_tiers: - # Get corresponding ceph backend for the tier, if any - bk = None - for bk in ceph_backends: - if t.forbackendid == bk.id: - break - - # Get pool replication parameters - pool_size, pool_min_size = StorageBackendConfig.get_ceph_pool_replication(self._db_api, bk) - if bk and ceph_backend and bk.name == ceph_backend.name: - # Override replication - pool_size = new_pool_size if new_pool_size else pool_size - pool_min_size = new_pool_min_size if new_pool_min_size else pool_min_size - - # Configure tier OSD pools - if t.uuid == self.primary_tier_uuid: - # This is primary tier - # In case we're updating pool_size to a different value than - # default. Just update pool size for ceph's default pool 'rbd' - # as well - try: - self._configure_primary_tier_pool( - {'pool_name': constants.CEPH_POOL_RBD_NAME, - 'pg_num': constants.CEPH_POOL_RBD_PG_NUM, - 'pgp_num': constants.CEPH_POOL_RBD_PGP_NUM}, - pool_size, - pool_min_size) - except exception.CephFailure: - pass - - # Handle primary tier pools (cinder/glance/swift/ephemeral) - for pool in CEPH_POOLS: - # TODO(rchurch): The following is added for R3->R4 upgrades. Can we - # remove this for R5? Or is there some R3->R4->R5 need to keep this - # around. - try: - self.update_ceph_object_pool_name(pool) - except exception.CephFailure: - pass - - self._configure_primary_tier_pool(pool, pool_size, - pool_min_size) - else: - try: - self._configure_secondary_tier_pools(t, pool_size, - pool_min_size) - except exception.CephPoolRulesetFailure as e: - LOG.info("Cannot add pools: %s" % e) - except exception.CephFailure as e: - LOG.info("Cannot add pools: %s" % e) - def _update_db_capabilities(self, bk, new_storceph): # Avoid updating DB for all capabilities in new_storceph as we # don't manage them. Leave the callers deal with it. diff --git a/sysinv/sysinv/sysinv/sysinv/conductor/manager.py b/sysinv/sysinv/sysinv/sysinv/conductor/manager.py index ca258b8c31..41f40f8e07 100644 --- a/sysinv/sysinv/sysinv/sysinv/conductor/manager.py +++ b/sysinv/sysinv/sysinv/sysinv/conductor/manager.py @@ -283,15 +283,6 @@ class ConductorManager(service.PeriodicService): self.dbapi.remotelogging_create(system_id_attribute_value) self.dbapi.ptp_create(system_id_attribute_value) - # set default storage_backend - values.update({'backend': constants.SB_TYPE_FILE, - 'name': constants.SB_DEFAULT_NAMES[constants.SB_TYPE_FILE], - 'state': constants.SB_STATE_CONFIGURED, - 'task': constants.SB_TASK_NONE, - 'services': None, - 'capabilities': {}}) - self.dbapi.storage_backend_create(values) - # populate service table for optional_service in constants.ALL_OPTIONAL_SERVICES: self.dbapi.service_create({'name': optional_service, @@ -4378,8 +4369,9 @@ class ConductorManager(service.PeriodicService): if availability == constants.AVAILABILITY_AVAILABLE: if imsg_dict.get(constants.SYSINV_AGENT_FIRST_REPORT): - # This should be run once after a boot + # This should be run once after a node boot self._clear_ceph_stor_state(ihost_uuid) + cceph.fix_crushmap(self.dbapi) config_uuid = imsg_dict['config_applied'] self._update_host_config_applied(context, ihost, config_uuid) @@ -5215,39 +5207,6 @@ class ConductorManager(service.PeriodicService): # Not sure yet what the proper response is here pass - def configure_osd_istor(self, context, istor_obj): - """Synchronously, have a conductor configure an OSD istor. - - Does the following tasks: - - Allocates an OSD. - - Creates or resizes an OSD pool as necessary. - - :param context: request context. - :param istor_obj: an istor object. - :returns: istor object, with updated osdid - """ - - if istor_obj['osdid']: - LOG.error("OSD already assigned: %s", str(istor_obj['osdid'])) - raise exception.SysinvException(_( - "Invalid method call: osdid already assigned: %s") % - str(istor_obj['osdid'])) - - # Create the OSD - response, body = self._ceph.osd_create(istor_obj['uuid'], body='json') - if not response.ok: - LOG.error("OSD create failed: %s", response.reason) - response.raise_for_status() - - # Update the osdid in the stor object - istor_obj['osdid'] = body['output']['osdid'] - - # TODO(CephPoolsDecouple): remove - if not utils.is_kubernetes_config(self.dbapi): - self._ceph.configure_osd_pools() - - return istor_obj - def restore_ceph_config(self, context, after_storage_enabled=False): """Restore Ceph configuration during Backup and Restore process. diff --git a/sysinv/sysinv/sysinv/sysinv/puppet/ceph.py b/sysinv/sysinv/sysinv/sysinv/puppet/ceph.py index 06af90ea71..6ada3b3303 100644 --- a/sysinv/sysinv/sysinv/sysinv/puppet/ceph.py +++ b/sysinv/sysinv/sysinv/sysinv/puppet/ceph.py @@ -47,6 +47,9 @@ class CephPuppet(openstack.OpenstackBasePuppet): ceph_mon_ips = StorageBackendConfig.get_ceph_mon_ip_addresses( self.dbapi) + if not ceph_mon_ips: + return {} # system configuration is not yet ready + controller_hosts = [constants.CONTROLLER_0_HOSTNAME, constants.CONTROLLER_1_HOSTNAME] mon_2_host = [mon['hostname'] for mon in self.dbapi.ceph_mon_get_list() if mon['hostname'] not in controller_hosts]