Merge "Make Ceph the default Storage Backend"

This commit is contained in:
Zuul 2019-04-17 21:48:29 +00:00 committed by Gerrit Code Review
commit 5070f6491b
20 changed files with 241 additions and 249 deletions

View File

@ -5077,6 +5077,15 @@ class ConfigAssistant():
} }
client.sysinv.route.create(**values) client.sysinv.route.create(**values)
def _populate_default_storage_backend(self, client, controller):
# Create the Ceph monitor for controller-0
values = {'ihost_uuid': controller.uuid}
client.sysinv.ceph_mon.create(**values)
# Create the Ceph default backend
values = {'confirmed': True}
client.sysinv.storage_ceph.create(**values)
def _populate_infrastructure_interface(self, client, controller): def _populate_infrastructure_interface(self, client, controller):
"""Configure the infrastructure interface(s)""" """Configure the infrastructure interface(s)"""
if not self.infrastructure_interface: if not self.infrastructure_interface:
@ -5349,6 +5358,7 @@ class ConfigAssistant():
# ceph_mon config requires controller host to be created # ceph_mon config requires controller host to be created
self._inventory_config_complete_wait(client, controller) self._inventory_config_complete_wait(client, controller)
self._populate_interface_config(client, controller) self._populate_interface_config(client, controller)
self._populate_default_storage_backend(client, controller)
except (KeystoneFail, SysInvFail) as e: except (KeystoneFail, SysInvFail) as e:
LOG.exception(e) LOG.exception(e)

View File

@ -19,8 +19,8 @@ import time
# constants in controllerconfig. When it is time to remove/deprecate these # constants in controllerconfig. When it is time to remove/deprecate these
# packages, classes OpenStack, Token and referenced constants need to be moved # packages, classes OpenStack, Token and referenced constants need to be moved
# to this standalone script. # to this standalone script.
from controllerconfig import ConfigFail
from controllerconfig.common import constants from controllerconfig.common import constants
from controllerconfig import ConfigFail
from controllerconfig import openstack from controllerconfig import openstack
from controllerconfig import sysinv_api as sysinv from controllerconfig import sysinv_api as sysinv
@ -711,6 +711,19 @@ def inventory_config_complete_wait(client, controller):
wait_pv_config(client, controller) wait_pv_config(client, controller)
def populate_default_storage_backend(client, controller):
if not INITIAL_POPULATION:
return
print("Populating ceph-mon config for controller-0...")
values = {'ihost_uuid': controller.uuid}
client.sysinv.ceph_mon.create(**values)
print("Populating ceph storage backend config...")
values = {'confirmed': True}
client.sysinv.storage_ceph.create(**values)
def handle_invalid_input(): def handle_invalid_input():
raise Exception("Invalid input!\nUsage: <bootstrap-config-file> " raise Exception("Invalid input!\nUsage: <bootstrap-config-file> "
"[--system] [--network] [--service]") "[--system] [--network] [--service]")
@ -757,6 +770,7 @@ if __name__ == '__main__':
populate_docker_config(client) populate_docker_config(client)
controller = populate_controller_config(client) controller = populate_controller_config(client)
inventory_config_complete_wait(client, controller) inventory_config_complete_wait(client, controller)
populate_default_storage_backend(client, controller)
os.remove(config_file) os.remove(config_file)
if INITIAL_POPULATION: if INITIAL_POPULATION:
print("Successfully updated the initial system config.") print("Successfully updated the initial system config.")

View File

@ -1,8 +0,0 @@
# Returns true if cinder ceph needs to be configured
Facter.add("is_initial_cinder_ceph_config") do
setcode do
conf_path = Facter::Core::Execution.exec("hiera --config /etc/puppet/hiera.yaml platform::params::config_path")
! File.exist?(conf_path +'.initial_cinder_ceph_config_complete')
end
end

View File

@ -1,8 +0,0 @@
# Returns true is this is the initial cinder config for this system
Facter.add("is_initial_cinder_config") do
setcode do
conf_path = Facter::Core::Execution.exec("hiera --config /etc/puppet/hiera.yaml platform::params::config_path")
! File.exist?(conf_path + '.initial_cinder_config_complete')
end
end

View File

@ -1,8 +0,0 @@
# Returns true if cinder lvm needs to be configured
Facter.add("is_initial_cinder_lvm_config") do
setcode do
conf_path = Facter::Core::Execution.exec("hiera --config /etc/puppet/hiera.yaml platform::params::config_path")
! File.exist?(conf_path + '.initial_cinder_lvm_config_complete')
end
end

View File

@ -1,7 +0,0 @@
# Returns true if cinder LVM needs to be configured on current node
Facter.add("is_node_cinder_lvm_config") do
setcode do
! File.exist?('/etc/platform/.node_cinder_lvm_config_complete')
end
end

View File

@ -153,11 +153,11 @@ class platform::ceph::post
} }
if $service_enabled { if $service_enabled {
# Ceph configuration on this node is done
file { $node_ceph_configured_flag: file { $node_ceph_configured_flag:
ensure => present ensure => present
} }
} }
} }
@ -169,8 +169,8 @@ class platform::ceph::monitor
if $service_enabled { if $service_enabled {
if $system_type == 'All-in-one' and 'duplex' in $system_mode { if $system_type == 'All-in-one' and 'duplex' in $system_mode {
if str2bool($::is_controller_active) { if str2bool($::is_standalone_controller) {
# Ceph mon is configured on a DRBD partition, on the active controller, # Ceph mon is configured on a DRBD partition,
# when 'ceph' storage backend is added in sysinv. # when 'ceph' storage backend is added in sysinv.
# Then SM takes care of starting ceph after manifests are applied. # Then SM takes care of starting ceph after manifests are applied.
$configure_ceph_mon = true $configure_ceph_mon = true
@ -236,6 +236,31 @@ class platform::ceph::monitor
# ensure configuration is complete before creating monitors # ensure configuration is complete before creating monitors
Class['::ceph'] -> Ceph::Mon <| |> Class['::ceph'] -> Ceph::Mon <| |>
# ensure we load the crushmap at first unlock
if $system_type == 'All-in-one' and str2bool($::is_standalone_controller) {
if 'duplex' in $system_mode {
$crushmap_txt = '/etc/sysinv/crushmap-controller-model.txt'
} else {
$crushmap_txt = '/etc/sysinv/crushmap-aio-sx.txt'
}
$crushmap_bin = '/etc/sysinv/crushmap.bin'
Ceph::Mon <| |>
-> exec { 'Compile crushmap':
command => "crushtool -c ${crushmap_txt} -o ${crushmap_bin}",
onlyif => "test ! -f ${crushmap_bin}",
logoutput => true,
}
-> exec { 'Set crushmap':
command => "ceph osd setcrushmap -i ${crushmap_bin}",
unless => 'ceph osd crush rule list --format plain | grep -e "storage_tier_ruleset"',
logoutput => true,
}
-> Platform_ceph_osd <| |>
}
# Ensure networking is up before Monitors are configured
Anchor['platform::networking'] -> Ceph::Mon <| |>
# default configuration for all ceph monitor resources # default configuration for all ceph monitor resources
Ceph::Mon { Ceph::Mon {
fsid => $cluster_uuid, fsid => $cluster_uuid,
@ -248,33 +273,10 @@ class platform::ceph::monitor
public_addr => $floating_mon_ip, public_addr => $floating_mon_ip,
} }
if (str2bool($::is_controller_active) and # On AIO-DX there is a single, floating, Ceph monitor backed by DRBD.
str2bool($::is_initial_cinder_ceph_config) and # Therefore DRBD must be up before Ceph monitor is configured
!str2bool($::is_standalone_controller)) { Drbd::Resource <| |> -> Ceph::Mon <| |>
# When we configure ceph after both controllers are active,
# we need to stop the monitor, unmount the monitor partition
# and set the drbd role to secondary, so that the handoff to
# SM is done properly once we swact to the standby controller.
# TODO: Remove this once SM supports in-service config reload.
Ceph::Mon <| |>
-> exec { 'Stop Ceph monitor':
command =>'/etc/init.d/ceph stop mon',
onlyif => '/etc/init.d/ceph status mon',
logoutput => true,
}
-> exec { 'umount ceph-mon partition':
command => "umount ${mon_mountpoint}",
onlyif => "mount | grep -q ${mon_mountpoint}",
logoutput => true,
}
-> exec { 'Set cephmon secondary':
command => 'drbdadm secondary drbd-cephmon',
unless => "drbdadm role drbd-cephmon | egrep '^Secondary'",
logoutput => true,
}
}
} else { } else {
if $::hostname == $mon_0_host { if $::hostname == $mon_0_host {
ceph::mon { $mon_0_host: ceph::mon { $mon_0_host:
@ -295,8 +297,7 @@ class platform::ceph::monitor
} }
} }
define osd_crush_location(
define platform_ceph_osd(
$osd_id, $osd_id,
$osd_uuid, $osd_uuid,
$disk_path, $disk_path,
@ -311,12 +312,28 @@ define platform_ceph_osd(
"osd.${$osd_id}/crush_location": value => "root=${tier_name}-tier host=${$::platform::params::hostname}-${$tier_name}"; "osd.${$osd_id}/crush_location": value => "root=${tier_name}-tier host=${$::platform::params::hostname}-${$tier_name}";
} }
} }
file { "/var/lib/ceph/osd/ceph-${osd_id}": }
define platform_ceph_osd(
$osd_id,
$osd_uuid,
$disk_path,
$data_path,
$journal_path,
$tier_name,
) {
Anchor['platform::networking'] # Make sure networking is up before running ceph commands
-> file { "/var/lib/ceph/osd/ceph-${osd_id}":
ensure => 'directory', ensure => 'directory',
owner => 'root', owner => 'root',
group => 'root', group => 'root',
mode => '0755', mode => '0755',
} }
-> exec { "ceph osd create ${osd_uuid} ${osd_id}":
logoutput => true,
command => template('platform/ceph.osd.create.erb'),
}
-> ceph::osd { $disk_path: -> ceph::osd { $disk_path:
uuid => $osd_uuid, uuid => $osd_uuid,
} }
@ -351,8 +368,13 @@ class platform::ceph::osds(
mode => '0755', mode => '0755',
} }
# Ensure ceph.conf is complete before configuring OSDs
Class['::ceph'] -> Platform_ceph_osd <| |>
# Journal disks need to be prepared before the OSDs are configured # Journal disks need to be prepared before the OSDs are configured
Platform_ceph_journal <| |> -> Platform_ceph_osd <| |> Platform_ceph_journal <| |> -> Platform_ceph_osd <| |>
# Crush locations in ceph.conf need to be set before the OSDs are configured
Osd_crush_location <| |> -> Platform_ceph_osd <| |>
# default configuration for all ceph object resources # default configuration for all ceph object resources
Ceph::Osd { Ceph::Osd {
@ -360,6 +382,7 @@ class platform::ceph::osds(
cluster_uuid => $cluster_uuid, cluster_uuid => $cluster_uuid,
} }
create_resources('osd_crush_location', $osd_config)
create_resources('platform_ceph_osd', $osd_config) create_resources('platform_ceph_osd', $osd_config)
create_resources('platform_ceph_journal', $journal_config) create_resources('platform_ceph_journal', $journal_config)
} }
@ -479,6 +502,7 @@ class platform::ceph::runtime_base {
class platform::ceph::runtime_osds { class platform::ceph::runtime_osds {
include ::ceph::params include ::ceph::params
include ::platform::ceph
include ::platform::ceph::osds include ::platform::ceph::osds
# Since this is runtime we have to avoid checking status of Ceph while we # Since this is runtime we have to avoid checking status of Ceph while we

View File

@ -445,16 +445,13 @@ class platform::drbd::cephmon ()
$system_mode = $::platform::params::system_mode $system_mode = $::platform::params::system_mode
$system_type = $::platform::params::system_type $system_type = $::platform::params::system_type
#TODO: This will change once we remove the native cinder service if str2bool($::is_standalone_controller) and ! str2bool($::is_node_ceph_configured) {
if (str2bool($::is_initial_config_primary) or
(str2bool($::is_controller_active) and str2bool($::is_initial_cinder_ceph_config))
){
# Active controller, first time configuration. # Active controller, first time configuration.
$drbd_primary = true $drbd_primary = true
$drbd_initial = true $drbd_initial = true
$drbd_automount = true $drbd_automount = true
} elsif str2bool($::is_standalone_controller){ } elsif str2bool($::is_standalone_controller) {
# Active standalone controller, successive reboots. # Active standalone controller, successive reboots.
$drbd_primary = true $drbd_primary = true
$drbd_initial = undef $drbd_initial = undef
@ -490,9 +487,9 @@ class platform::drbd(
$service_enable = false, $service_enable = false,
$service_ensure = 'stopped', $service_ensure = 'stopped',
) { ) {
if (str2bool($::is_initial_config_primary) if (str2bool($::is_initial_config_primary) or str2bool($::is_standalone_controller)
){ ){
# Enable DRBD at config_controller # Enable DRBD on standalone
class { '::drbd': class { '::drbd':
service_enable => true, service_enable => true,
service_ensure => 'running', service_ensure => 'running',
@ -553,41 +550,55 @@ class platform::drbd::runtime {
} }
} }
class platform::drbd::runtime_service_enable {
class { '::drbd':
service_enable => true,
service_ensure => 'running'
}
}
class platform::drbd::pgsql::runtime { class platform::drbd::pgsql::runtime {
include ::platform::drbd::params include ::platform::drbd::params
include ::platform::drbd::runtime_service_enable
include ::platform::drbd::pgsql include ::platform::drbd::pgsql
} }
class platform::drbd::cgcs::runtime { class platform::drbd::cgcs::runtime {
include ::platform::drbd::params include ::platform::drbd::params
include ::platform::drbd::runtime_service_enable
include ::platform::drbd::cgcs include ::platform::drbd::cgcs
} }
class platform::drbd::extension::runtime { class platform::drbd::extension::runtime {
include ::platform::drbd::params include ::platform::drbd::params
include ::platform::drbd::runtime_service_enable
include ::platform::drbd::extension include ::platform::drbd::extension
} }
class platform::drbd::patch_vault::runtime { class platform::drbd::patch_vault::runtime {
include ::platform::drbd::params include ::platform::drbd::params
include ::platform::drbd::runtime_service_enable
include ::platform::drbd::patch_vault include ::platform::drbd::patch_vault
} }
class platform::drbd::etcd::runtime { class platform::drbd::etcd::runtime {
include ::platform::drbd::params include ::platform::drbd::params
include ::platform::drbd::runtime_service_enable
include ::platform::drbd::etcd include ::platform::drbd::etcd
} }
class platform::drbd::dockerdistribution::runtime { class platform::drbd::dockerdistribution::runtime {
include ::platform::drbd::params include ::platform::drbd::params
include ::platform::drbd::runtime_service_enable
include ::platform::drbd::dockerdistribution include ::platform::drbd::dockerdistribution
} }
class platform::drbd::cephmon::runtime { class platform::drbd::cephmon::runtime {
include ::platform::drbd::params include ::platform::drbd::params
include ::platform::drbd::runtime_service_enable
include ::platform::drbd::cephmon include ::platform::drbd::cephmon
} }

View File

@ -0,0 +1,52 @@
/bin/true # puppet requires this for correct template parsing
# This is needed to pin a specific OSD id with a corresponding UUID.
# Problem is ceph-disk prepare doesn't accept ceph OSD id as cli
# parameter. Therefore, the OSD with desired OSD ID and UUID must
# exist before puppet ceph module executes ceph-disk prepare.
set -x
osd_id=<%= @osd_id %>
osd_uuid=<%= @osd_uuid %>
# Ignore if Ceph is down, this case should only happen on DOR
timeout 10 ceph -s
if [ $? -ne 0 ]; then
exit 0
fi
# Check if OSD exists and has the correct UUID
osds=( $(ceph osd ls) )
if [[ " ${osds[@]} " =~ " ${osd_id} " ]]; then
# Get UUID, this is slower than osd ls as it also lists PGs with problems
# but is the only way to get the uuid of an OSD.
found_uuid=$(ceph osd dump | grep "^osd.${osd_id} " | awk '{print $NF}')
if [ "${found_uuid}" != "${osd_uuid}" ]; then
# At B&R ceph's crushmap is restored but, although OSDs are properly
# allocated to their hosts in the tree, crushmap does not store
# OSD UUIDs. Therefore, w/o osd_id and uuid match, when the OSD is
# prepared there is a chance that ceph-disk will create a new OSD
# that will no longer match the osd id in sysinv db. So, we have
# to remove OSDs that don't match UUIDs and recreate them with
# expected OSD ID and UUID so that ceph-disk does not get confused.
ceph osd rm ${osd_id}
RET=$?
if [ $RET -ne 0 ]; then
echo "Error removing osd ${osd_id}, exit code: ${RET}"
exit $RET
fi
else
# OSD exists and has the correct uuid
exit 0
fi
fi
# Create the OSD with desired id and uuid
ceph osd create ${osd_uuid} ${osd_id}
RET=$?
if [ $RET -ne 0 ]; then
echo "Error creating osd ${osd_id}, exit code: ${RET}"
exit $RET
fi
set +x

View File

@ -11,8 +11,6 @@ from cgtsclient.common import base
from cgtsclient.common import constants from cgtsclient.common import constants
from cgtsclient.common import utils from cgtsclient.common import utils
from cgtsclient import exc from cgtsclient import exc
from cgtsclient.v1 import ceph_mon as ceph_mon_utils
from cgtsclient.v1 import ihost as ihost_utils
from cgtsclient.v1 import storage_ceph # noqa from cgtsclient.v1 import storage_ceph # noqa
from cgtsclient.v1 import storage_ceph_external # noqa from cgtsclient.v1 import storage_ceph_external # noqa
from cgtsclient.v1 import storage_external # noqa from cgtsclient.v1 import storage_external # noqa
@ -138,23 +136,6 @@ def _display_next_steps():
def backend_add(cc, backend, args): def backend_add(cc, backend, args):
backend = backend.replace('-', '_') backend = backend.replace('-', '_')
# add ceph mons to controllers
if backend == constants.SB_TYPE_CEPH:
# Controllers should always have monitors.
# Not finding a controller means it's not yet configured,
# so move forward.
try:
ihost = ihost_utils._find_ihost(cc, constants.CONTROLLER_0_HOSTNAME)
ceph_mon_utils.ceph_mon_add(cc, args, ihost.uuid)
except exc.CommandError:
pass
try:
ihost = ihost_utils._find_ihost(cc, constants.CONTROLLER_1_HOSTNAME)
ceph_mon_utils.ceph_mon_add(cc, args, ihost.uuid)
except exc.CommandError:
pass
# allowed storage_backend fields # allowed storage_backend fields
allowed_fields = ['name', 'services', 'confirmed', 'ceph_conf'] allowed_fields = ['name', 'services', 'confirmed', 'ceph_conf']

View File

@ -512,6 +512,10 @@ class AgentManager(service.PeriodicService):
msg_dict.update({constants.HOST_ACTION_STATE: msg_dict.update({constants.HOST_ACTION_STATE:
constants.HAS_REINSTALLED}) constants.HAS_REINSTALLED})
# Is this the first time since boot we are reporting to conductor?
msg_dict.update({constants.SYSINV_AGENT_FIRST_REPORT:
not os.path.exists(SYSINV_FIRST_REPORT_FLAG)})
try: try:
rpcapi.iplatform_update_by_ihost(context, rpcapi.iplatform_update_by_ihost(context,
host_uuid, host_uuid,
@ -916,10 +920,6 @@ class AgentManager(service.PeriodicService):
if iscsi_initiator_name is not None: if iscsi_initiator_name is not None:
imsg_dict.update({'iscsi_initiator_name': iscsi_initiator_name}) imsg_dict.update({'iscsi_initiator_name': iscsi_initiator_name})
# Is this the first time since boot we are reporting to conductor?
imsg_dict.update({constants.SYSINV_AGENT_FIRST_REPORT:
not os.path.exists(SYSINV_FIRST_REPORT_FLAG)})
self.platform_update_by_host(rpcapi, self.platform_update_by_host(rpcapi,
icontext, icontext,
self._ihost_uuid, self._ihost_uuid,

View File

@ -446,9 +446,10 @@ def _create(ceph_mon):
"replication is set to: %s'. Please update replication " "replication is set to: %s'. Please update replication "
"before configuring a monitor on a worker node." % supported_replication)) "before configuring a monitor on a worker node." % supported_replication))
# host must be locked and online # host must be locked and online unless this is controller-0
if (chost['availability'] != constants.AVAILABILITY_ONLINE or if (chost['hostname'] != constants.CONTROLLER_0_HOSTNAME and
chost['administrative'] != constants.ADMIN_LOCKED): (chost['availability'] != constants.AVAILABILITY_ONLINE or
chost['administrative'] != constants.ADMIN_LOCKED)):
raise wsme.exc.ClientSideError( raise wsme.exc.ClientSideError(
_("Host %s must be locked and online." % chost['hostname'])) _("Host %s must be locked and online." % chost['hostname']))

View File

@ -5295,7 +5295,7 @@ class HostController(rest.RestController):
pass pass
elif StorageBackendConfig.has_backend_configured( elif StorageBackendConfig.has_backend_configured(
pecan.request.dbapi, pecan.request.dbapi,
constants.CINDER_BACKEND_CEPH): constants.SB_TYPE_CEPH):
if utils.is_aio_simplex_system(pecan.request.dbapi): if utils.is_aio_simplex_system(pecan.request.dbapi):
# Check if host has enough OSDs configured for each tier # Check if host has enough OSDs configured for each tier
tiers = pecan.request.dbapi.storage_tier_get_all() tiers = pecan.request.dbapi.storage_tier_get_all()

View File

@ -471,23 +471,33 @@ class StorageController(rest.RestController):
except Exception as e: except Exception as e:
LOG.exception(e) LOG.exception(e)
raise raise
# Make sure that we are allowed to delete # Make sure that we are allowed to delete
_check_host(stor) _check_host(stor)
# Delete the stor if supported # Delete the stor if supported
ihost_id = stor['forihostid']
ihost = pecan.request.dbapi.ihost_get(ihost_id)
if stor.function == constants.STOR_FUNCTION_JOURNAL: if stor.function == constants.STOR_FUNCTION_JOURNAL:
# Host must be locked # Host must be locked
ihost_id = stor['forihostid']
ihost = pecan.request.dbapi.ihost_get(ihost_id)
if ihost['administrative'] != constants.ADMIN_LOCKED: if ihost['administrative'] != constants.ADMIN_LOCKED:
raise wsme.exc.ClientSideError(_("Host %s must be locked." % raise wsme.exc.ClientSideError(_("Host %s must be locked." %
ihost['hostname'])) ihost['hostname']))
self.delete_stor(stor_uuid) self.delete_stor(stor_uuid)
elif (stor.function == constants.STOR_FUNCTION_OSD and
stor.state == constants.SB_STATE_CONFIGURING_ON_UNLOCK):
# Host must be locked
if ihost['administrative'] != constants.ADMIN_LOCKED:
raise wsme.exc.ClientSideError(_("Host %s must be locked." %
ihost['hostname']))
self.delete_stor(stor_uuid)
else: else:
raise wsme.exc.ClientSideError(_( raise wsme.exc.ClientSideError(_(
"Deleting a Storage Function other than %s is not " "Deleting a Storage Function other than '%s' and '%s' in "
"supported on this setup") % constants.STOR_FUNCTION_JOURNAL) "state '%s' is not supported on this setup.") %
(constants.STOR_FUNCTION_JOURNAL,
constants.STOR_FUNCTION_OSD,
constants.SB_STATE_CONFIGURING_ON_UNLOCK))
def delete_stor(self, stor_uuid): def delete_stor(self, stor_uuid):
"""Delete a stor""" """Delete a stor"""
@ -497,10 +507,10 @@ class StorageController(rest.RestController):
try: try:
# The conductor will handle removing the stor, not all functions # The conductor will handle removing the stor, not all functions
# need special handling # need special handling
if stor.function == constants.STOR_FUNCTION_OSD: # if stor.function == constants.STOR_FUNCTION_OSD:
pecan.request.rpcapi.unconfigure_osd_istor(pecan.request.context, # pecan.request.rpcapi.unconfigure_osd_istor(pecan.request.context,
stor) # stor)
elif stor.function == constants.STOR_FUNCTION_JOURNAL: if stor.function == constants.STOR_FUNCTION_JOURNAL:
pecan.request.dbapi.istor_disable_journal(stor_uuid) pecan.request.dbapi.istor_disable_journal(stor_uuid)
# Now remove the stor from DB # Now remove the stor from DB
pecan.request.dbapi.istor_remove_disk_association(stor_uuid) pecan.request.dbapi.istor_remove_disk_association(stor_uuid)
@ -901,29 +911,21 @@ def _create(stor, iprofile=None):
"Invalid stor device type: only SSD and NVME devices are supported" "Invalid stor device type: only SSD and NVME devices are supported"
" for journal functions.")) " for journal functions."))
if osd_create is True:
# Get the next free OSD ID in the system
stors = pecan.request.dbapi.istor_get_list(sort_key='osdid', sort_dir='asc')
stors_ids = [s['osdid'] for s in stors if s['osdid'] is not None]
if stors_ids:
candidate_ids = [i for i in range(0, stors_ids[-1] + 2) if i not in stors_ids]
create_attrs['osdid'] = candidate_ids[0]
else:
create_attrs['osdid'] = 0
else:
create_attrs['osdid'] = None
new_stor = pecan.request.dbapi.istor_create(forihostid, new_stor = pecan.request.dbapi.istor_create(forihostid,
create_attrs) create_attrs)
# Create an osd associated with disk.
if osd_create is True:
try:
new_stor = pecan.request.rpcapi.configure_osd_istor(
pecan.request.context, new_stor)
except Exception as cpe:
LOG.exception(cpe)
# Delete the partially configure istor
pecan.request.dbapi.istor_destroy(new_stor.uuid)
raise wsme.exc.ClientSideError(_(
"Internal error: failed to create a storage object. "
"Make sure storage cluster is up and healthy."))
if iprofile:
new_stor = pecan.request.dbapi.istor_update(new_stor.uuid,
{'osdid': None})
else:
# Update the database record
new_stor.save(pecan.request.context)
# Associate the disk to db record # Associate the disk to db record
values = {'foristorid': new_stor.id} values = {'foristorid': new_stor.id}
pecan.request.dbapi.idisk_update(idisk_uuid, pecan.request.dbapi.idisk_update(idisk_uuid,

View File

@ -339,7 +339,6 @@ class StorageCephController(rest.RestController):
@wsme_pecan.wsexpose(None, types.uuid, status_code=204) @wsme_pecan.wsexpose(None, types.uuid, status_code=204)
def delete(self, storageceph_uuid): def delete(self, storageceph_uuid):
"""Delete a backend.""" """Delete a backend."""
return _delete(storageceph_uuid) return _delete(storageceph_uuid)
@ -696,14 +695,8 @@ def _apply_backend_changes(op, sb_obj):
services = api_helper.getListFromServices(sb_obj.as_dict()) services = api_helper.getListFromServices(sb_obj.as_dict())
if op == constants.SB_API_OP_CREATE: if op == constants.SB_API_OP_CREATE:
if sb_obj.name == constants.SB_DEFAULT_NAMES[ if sb_obj.name != constants.SB_DEFAULT_NAMES[
constants.SB_TYPE_CEPH]: constants.SB_TYPE_CEPH]:
# Apply manifests for primary tier
pecan.request.rpcapi.update_ceph_config(pecan.request.context,
sb_obj.uuid,
services)
else:
# Enable the service(s) use of the backend # Enable the service(s) use of the backend
if constants.SB_SVC_CINDER in services: if constants.SB_SVC_CINDER in services:
pecan.request.rpcapi.update_ceph_services( pecan.request.rpcapi.update_ceph_services(
@ -800,10 +793,21 @@ def _set_defaults(storage_ceph):
'kube_pool_gib': None, 'kube_pool_gib': None,
'object_gateway': False, 'object_gateway': False,
} }
sc = api_helper.set_backend_data(storage_ceph, sc = api_helper.set_backend_data(storage_ceph,
defaults, defaults,
CAPABILITIES, CAPABILITIES,
constants.SB_CEPH_SVCS_SUPPORTED) constants.SB_CEPH_SVCS_SUPPORTED)
# Ceph is our default storage backend and is added at configuration
# set state and task accordingly.
if sc['name'] == constants.SB_DEFAULT_NAMES[constants.SB_TYPE_CEPH]:
sc['state'] = constants.SB_STATE_CONFIGURED
if utils.is_aio_simplex_system(pecan.request.dbapi):
sc['task'] = None
else:
sc['task'] = constants.SB_TASK_RECONFIG_CONTROLLER
return sc return sc

View File

@ -24,6 +24,8 @@ import pecan
import os import os
import requests import requests
from sysinv.api.controllers.v1.utils import is_aio_system
LOG = logging.getLogger(__name__) LOG = logging.getLogger(__name__)
@ -704,11 +706,36 @@ class CephApiOperator(object):
def fix_crushmap(dbapi=None): def fix_crushmap(dbapi=None):
""" Set Ceph's CRUSH Map based on storage model """ """ Set Ceph's CRUSH Map based on storage model """
def _create_crushmap_flag_file():
try:
open(crushmap_flag_file, "w").close()
except IOError as e:
LOG.warn(_('Failed to create flag file: {}. '
'Reason: {}').format(crushmap_flag_file, e))
if not dbapi: if not dbapi:
dbapi = pecan.request.dbapi dbapi = pecan.request.dbapi
crushmap_flag_file = os.path.join(constants.SYSINV_CONFIG_PATH, crushmap_flag_file = os.path.join(constants.SYSINV_CONFIG_PATH,
constants.CEPH_CRUSH_MAP_APPLIED) constants.CEPH_CRUSH_MAP_APPLIED)
if not os.path.isfile(crushmap_flag_file): if not os.path.isfile(crushmap_flag_file):
_operator = CephApiOperator()
if not is_aio_system(dbapi):
# At least two monitors have to be running on a standard deployment,
# otherwise don't even try to load the crushmap.
active_mons, required_mons, __ = _operator.get_monitors_status(dbapi)
if required_mons > active_mons:
LOG.info("Not enough monitors yet available to fix crushmap.")
return False
# Crushmap may be already loaded thorough puppet, avoid doing it twice.
default_ceph_tier_name = constants.SB_TIER_DEFAULT_NAMES[
constants.SB_TIER_TYPE_CEPH] + constants.CEPH_CRUSH_TIER_SUFFIX
rule_is_present, __, __ = _operator._crush_rule_status(default_ceph_tier_name)
if rule_is_present:
_create_crushmap_flag_file()
return False
stor_model = get_ceph_storage_model(dbapi) stor_model = get_ceph_storage_model(dbapi)
if stor_model == constants.CEPH_AIO_SX_MODEL: if stor_model == constants.CEPH_AIO_SX_MODEL:
crushmap_txt = "/etc/sysinv/crushmap-aio-sx.txt" crushmap_txt = "/etc/sysinv/crushmap-aio-sx.txt"
@ -731,13 +758,10 @@ def fix_crushmap(dbapi=None):
# May not be critical, depends on where this is called. # May not be critical, depends on where this is called.
reason = "Error: %s Output: %s" % (str(e), e.output) reason = "Error: %s Output: %s" % (str(e), e.output)
raise exception.CephCrushMapNotApplied(reason=reason) raise exception.CephCrushMapNotApplied(reason=reason)
try: _create_crushmap_flag_file()
open(crushmap_flag_file, "w").close()
except IOError as e:
LOG.warn(_('Failed to create flag file: {}. '
'Reason: {}').format(crushmap_flag_file, e))
return True return True
return False
def get_ceph_storage_model(dbapi=None): def get_ceph_storage_model(dbapi=None):

View File

@ -220,6 +220,7 @@ class StorageBackendConfig(object):
} }
ceph_mons = dbapi.ceph_mon_get_list() ceph_mons = dbapi.ceph_mon_get_list()
ceph_mon = None
for ceph_mon in ceph_mons: for ceph_mon in ceph_mons:
if ceph_mon['hostname'] == constants.CONTROLLER_0_HOSTNAME: if ceph_mon['hostname'] == constants.CONTROLLER_0_HOSTNAME:
targets.update({'%s-%s' % (constants.CONTROLLER_0_HOSTNAME, targets.update({'%s-%s' % (constants.CONTROLLER_0_HOSTNAME,
@ -231,8 +232,6 @@ class StorageBackendConfig(object):
targets.update({'%s-%s' % (ceph_mon['hostname'], targets.update({'%s-%s' % (ceph_mon['hostname'],
network_type): 'ceph-mon-2-ip'}) network_type): 'ceph-mon-2-ip'})
ceph_mon['ceph_mon_gib'] = ceph_mons[0]['ceph_mon_gib']
results = {} results = {}
addrs = dbapi.addresses_get_all() addrs = dbapi.addresses_get_all()
for addr in addrs: for addr in addrs:

View File

@ -124,7 +124,6 @@ class CephOperator(object):
# cluster UUID value that is valid and consistent for the state of the # cluster UUID value that is valid and consistent for the state of the
# installation. Also make sure that we have a cluster DB entry # installation. Also make sure that we have a cluster DB entry
# established # established
LOG.debug("_init_db_cluster_and_tier: Reteiving cluster record")
try: try:
self._db_cluster = self._db_api.clusters_get_all( self._db_cluster = self._db_api.clusters_get_all(
type=constants.CINDER_BACKEND_CEPH)[0] type=constants.CINDER_BACKEND_CEPH)[0]
@ -132,7 +131,7 @@ class CephOperator(object):
# Retrieve ceph cluster fsid and update database # Retrieve ceph cluster fsid and update database
fsid = self._get_fsid() fsid = self._get_fsid()
if uuidutils.is_uuid_like(fsid): if uuidutils.is_uuid_like(fsid):
LOG.debug("Update cluster record: fsid=%s." % fsid) LOG.info("Update cluster record: fsid=%s." % fsid)
self._db_cluster.cluster_uuid = fsid self._db_cluster.cluster_uuid = fsid
self._db_api.cluster_update( self._db_api.cluster_update(
self.cluster_db_uuid, self.cluster_db_uuid,
@ -155,7 +154,7 @@ class CephOperator(object):
# Try to use ceph cluster fsid # Try to use ceph cluster fsid
fsid = self._get_fsid() fsid = self._get_fsid()
LOG.info("Create new cluster record: fsid=%s." % fsid) LOG.info("Create new ceph cluster record: fsid=%s." % fsid)
# Create the default primary cluster # Create the default primary cluster
self._db_cluster = self._db_api.cluster_create( self._db_cluster = self._db_api.cluster_create(
{'uuid': fsid if uuidutils.is_uuid_like(fsid) else str(uuid.uuid4()), {'uuid': fsid if uuidutils.is_uuid_like(fsid) else str(uuid.uuid4()),
@ -165,6 +164,7 @@ class CephOperator(object):
'system_id': isystem.id}) 'system_id': isystem.id})
# Create the default primary ceph storage tier # Create the default primary ceph storage tier
LOG.info("Create primary ceph tier record.")
self._db_primary_tier = self._db_api.storage_tier_create( self._db_primary_tier = self._db_api.storage_tier_create(
{'forclusterid': self.cluster_id, {'forclusterid': self.cluster_id,
'name': constants.SB_TIER_DEFAULT_NAMES[constants.SB_TIER_TYPE_CEPH], 'name': constants.SB_TIER_DEFAULT_NAMES[constants.SB_TIER_TYPE_CEPH],
@ -831,67 +831,6 @@ class CephOperator(object):
name=rule_name, reason=body['status']) name=rule_name, reason=body['status'])
raise e raise e
# TODO(CephPoolsDecouple): remove
def configure_osd_pools(self, ceph_backend=None, new_pool_size=None, new_pool_min_size=None):
"""Create or resize all of the osd pools as needed
ceph backend could be 2nd backend which is in configuring state
"""
# Handle pools for multiple tiers
tiers = self._db_api.storage_tier_get_by_cluster(self.cluster_db_uuid)
ceph_tiers = [t for t in tiers if t.type == constants.SB_TIER_TYPE_CEPH]
ceph_backends = self._db_api.storage_ceph_get_list()
for t in ceph_tiers:
# Get corresponding ceph backend for the tier, if any
bk = None
for bk in ceph_backends:
if t.forbackendid == bk.id:
break
# Get pool replication parameters
pool_size, pool_min_size = StorageBackendConfig.get_ceph_pool_replication(self._db_api, bk)
if bk and ceph_backend and bk.name == ceph_backend.name:
# Override replication
pool_size = new_pool_size if new_pool_size else pool_size
pool_min_size = new_pool_min_size if new_pool_min_size else pool_min_size
# Configure tier OSD pools
if t.uuid == self.primary_tier_uuid:
# This is primary tier
# In case we're updating pool_size to a different value than
# default. Just update pool size for ceph's default pool 'rbd'
# as well
try:
self._configure_primary_tier_pool(
{'pool_name': constants.CEPH_POOL_RBD_NAME,
'pg_num': constants.CEPH_POOL_RBD_PG_NUM,
'pgp_num': constants.CEPH_POOL_RBD_PGP_NUM},
pool_size,
pool_min_size)
except exception.CephFailure:
pass
# Handle primary tier pools (cinder/glance/swift/ephemeral)
for pool in CEPH_POOLS:
# TODO(rchurch): The following is added for R3->R4 upgrades. Can we
# remove this for R5? Or is there some R3->R4->R5 need to keep this
# around.
try:
self.update_ceph_object_pool_name(pool)
except exception.CephFailure:
pass
self._configure_primary_tier_pool(pool, pool_size,
pool_min_size)
else:
try:
self._configure_secondary_tier_pools(t, pool_size,
pool_min_size)
except exception.CephPoolRulesetFailure as e:
LOG.info("Cannot add pools: %s" % e)
except exception.CephFailure as e:
LOG.info("Cannot add pools: %s" % e)
def _update_db_capabilities(self, bk, new_storceph): def _update_db_capabilities(self, bk, new_storceph):
# Avoid updating DB for all capabilities in new_storceph as we # Avoid updating DB for all capabilities in new_storceph as we
# don't manage them. Leave the callers deal with it. # don't manage them. Leave the callers deal with it.

View File

@ -283,15 +283,6 @@ class ConductorManager(service.PeriodicService):
self.dbapi.remotelogging_create(system_id_attribute_value) self.dbapi.remotelogging_create(system_id_attribute_value)
self.dbapi.ptp_create(system_id_attribute_value) self.dbapi.ptp_create(system_id_attribute_value)
# set default storage_backend
values.update({'backend': constants.SB_TYPE_FILE,
'name': constants.SB_DEFAULT_NAMES[constants.SB_TYPE_FILE],
'state': constants.SB_STATE_CONFIGURED,
'task': constants.SB_TASK_NONE,
'services': None,
'capabilities': {}})
self.dbapi.storage_backend_create(values)
# populate service table # populate service table
for optional_service in constants.ALL_OPTIONAL_SERVICES: for optional_service in constants.ALL_OPTIONAL_SERVICES:
self.dbapi.service_create({'name': optional_service, self.dbapi.service_create({'name': optional_service,
@ -4378,8 +4369,9 @@ class ConductorManager(service.PeriodicService):
if availability == constants.AVAILABILITY_AVAILABLE: if availability == constants.AVAILABILITY_AVAILABLE:
if imsg_dict.get(constants.SYSINV_AGENT_FIRST_REPORT): if imsg_dict.get(constants.SYSINV_AGENT_FIRST_REPORT):
# This should be run once after a boot # This should be run once after a node boot
self._clear_ceph_stor_state(ihost_uuid) self._clear_ceph_stor_state(ihost_uuid)
cceph.fix_crushmap(self.dbapi)
config_uuid = imsg_dict['config_applied'] config_uuid = imsg_dict['config_applied']
self._update_host_config_applied(context, ihost, config_uuid) self._update_host_config_applied(context, ihost, config_uuid)
@ -5215,39 +5207,6 @@ class ConductorManager(service.PeriodicService):
# Not sure yet what the proper response is here # Not sure yet what the proper response is here
pass pass
def configure_osd_istor(self, context, istor_obj):
"""Synchronously, have a conductor configure an OSD istor.
Does the following tasks:
- Allocates an OSD.
- Creates or resizes an OSD pool as necessary.
:param context: request context.
:param istor_obj: an istor object.
:returns: istor object, with updated osdid
"""
if istor_obj['osdid']:
LOG.error("OSD already assigned: %s", str(istor_obj['osdid']))
raise exception.SysinvException(_(
"Invalid method call: osdid already assigned: %s") %
str(istor_obj['osdid']))
# Create the OSD
response, body = self._ceph.osd_create(istor_obj['uuid'], body='json')
if not response.ok:
LOG.error("OSD create failed: %s", response.reason)
response.raise_for_status()
# Update the osdid in the stor object
istor_obj['osdid'] = body['output']['osdid']
# TODO(CephPoolsDecouple): remove
if not utils.is_kubernetes_config(self.dbapi):
self._ceph.configure_osd_pools()
return istor_obj
def restore_ceph_config(self, context, after_storage_enabled=False): def restore_ceph_config(self, context, after_storage_enabled=False):
"""Restore Ceph configuration during Backup and Restore process. """Restore Ceph configuration during Backup and Restore process.

View File

@ -47,6 +47,9 @@ class CephPuppet(openstack.OpenstackBasePuppet):
ceph_mon_ips = StorageBackendConfig.get_ceph_mon_ip_addresses( ceph_mon_ips = StorageBackendConfig.get_ceph_mon_ip_addresses(
self.dbapi) self.dbapi)
if not ceph_mon_ips:
return {} # system configuration is not yet ready
controller_hosts = [constants.CONTROLLER_0_HOSTNAME, constants.CONTROLLER_1_HOSTNAME] controller_hosts = [constants.CONTROLLER_0_HOSTNAME, constants.CONTROLLER_1_HOSTNAME]
mon_2_host = [mon['hostname'] for mon in self.dbapi.ceph_mon_get_list() if mon_2_host = [mon['hostname'] for mon in self.dbapi.ceph_mon_get_list() if
mon['hostname'] not in controller_hosts] mon['hostname'] not in controller_hosts]