Restore ceph during system restore
On AIO and standard setup, ceph osd(s) are running on the controller nodes. During restore after ceph is operational the backup crushmap needs to be set before creating osd(s). Change-Id: Id2a7c666fa3670c460f412cfc0184ad0a9f6ccff Story: 2004761 Task: 30522 Signed-off-by: Wei Zhou <wei.zhou@windriver.com>
This commit is contained in:
parent
686d10c9cf
commit
a03cbf1b67
@ -236,13 +236,20 @@ class platform::ceph::monitor
|
|||||||
|
|
||||||
# ensure we load the crushmap at first unlock
|
# ensure we load the crushmap at first unlock
|
||||||
if $system_type == 'All-in-one' and str2bool($::is_standalone_controller) {
|
if $system_type == 'All-in-one' and str2bool($::is_standalone_controller) {
|
||||||
|
$software_version = $::platform::params::software_version
|
||||||
|
|
||||||
if 'duplex' in $system_mode {
|
if 'duplex' in $system_mode {
|
||||||
$crushmap_txt = '/etc/sysinv/crushmap-controller-model.txt'
|
$crushmap_txt = '/etc/sysinv/crushmap-controller-model.txt'
|
||||||
} else {
|
} else {
|
||||||
$crushmap_txt = '/etc/sysinv/crushmap-aio-sx.txt'
|
$crushmap_txt = '/etc/sysinv/crushmap-aio-sx.txt'
|
||||||
}
|
}
|
||||||
$crushmap_bin = '/etc/sysinv/crushmap.bin'
|
$crushmap_bin = '/etc/sysinv/crushmap.bin'
|
||||||
|
$crushmap_backup_bin = "/opt/platform/sysinv/${software_version}/crushmap.bin.backup"
|
||||||
Ceph::Mon <| |>
|
Ceph::Mon <| |>
|
||||||
|
-> exec { 'Copy crushmap if backup exists':
|
||||||
|
command => "cp -f ${crushmap_backup_bin} ${crushmap_bin} && rm -f ${crushmap_backup_bin}",
|
||||||
|
onlyif => "test -f ${crushmap_backup_bin}",
|
||||||
|
}
|
||||||
-> exec { 'Compile crushmap':
|
-> exec { 'Compile crushmap':
|
||||||
command => "crushtool -c ${crushmap_txt} -o ${crushmap_bin}",
|
command => "crushtool -c ${crushmap_txt} -o ${crushmap_bin}",
|
||||||
onlyif => "test ! -f ${crushmap_bin}",
|
onlyif => "test ! -f ${crushmap_bin}",
|
||||||
@ -361,7 +368,12 @@ class platform::ceph::osds(
|
|||||||
$osd_config = {},
|
$osd_config = {},
|
||||||
$journal_config = {},
|
$journal_config = {},
|
||||||
) inherits ::platform::ceph::params {
|
) inherits ::platform::ceph::params {
|
||||||
|
$system_type = $::platform::params::system_type
|
||||||
|
|
||||||
|
# When applying controller manifest during restore the backed-up
|
||||||
|
# controller-0 hieradata contains osd info in osd_config but
|
||||||
|
# osd shouldn't be created as Ceph is down.
|
||||||
|
if $system_type == 'All-in-one' or ! str2bool($::is_standalone_controller) {
|
||||||
file { '/var/lib/ceph/osd':
|
file { '/var/lib/ceph/osd':
|
||||||
ensure => 'directory',
|
ensure => 'directory',
|
||||||
path => '/var/lib/ceph/osd',
|
path => '/var/lib/ceph/osd',
|
||||||
@ -387,6 +399,7 @@ class platform::ceph::osds(
|
|||||||
create_resources('osd_crush_location', $osd_config)
|
create_resources('osd_crush_location', $osd_config)
|
||||||
create_resources('platform_ceph_osd', $osd_config)
|
create_resources('platform_ceph_osd', $osd_config)
|
||||||
create_resources('platform_ceph_journal', $journal_config)
|
create_resources('platform_ceph_journal', $journal_config)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
class platform::ceph::haproxy
|
class platform::ceph::haproxy
|
||||||
|
@ -4510,8 +4510,7 @@ class HostController(rest.RestController):
|
|||||||
else:
|
else:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@staticmethod
|
def _update_add_ceph_state(self):
|
||||||
def _update_add_ceph_state():
|
|
||||||
api = pecan.request.dbapi
|
api = pecan.request.dbapi
|
||||||
|
|
||||||
backend = StorageBackendConfig.get_configuring_backend(api)
|
backend = StorageBackendConfig.get_configuring_backend(api)
|
||||||
@ -4619,19 +4618,49 @@ class HostController(rest.RestController):
|
|||||||
raise wsme.exc.ClientSideError(
|
raise wsme.exc.ClientSideError(
|
||||||
_("Restore Ceph config failed: %s" % e))
|
_("Restore Ceph config failed: %s" % e))
|
||||||
elif utils.is_aio_system(pecan.request.dbapi):
|
elif utils.is_aio_system(pecan.request.dbapi):
|
||||||
# TODO(wz): Need more work to restore ceph for AIO
|
# For AIO, Ceph restore is done in ceph puppet
|
||||||
LOG.info("For an AIO system, Restore crushmap...")
|
if not os.path.isfile(tsc.RESTORE_IN_PROGRESS_FLAG):
|
||||||
|
api.storage_backend_update(backend.uuid, {'task': None})
|
||||||
|
else:
|
||||||
|
# This is ceph restore for 2+2.
|
||||||
|
|
||||||
|
# If config_controller restore is still in progress, we wait.
|
||||||
|
if os.path.isfile(tsc.RESTORE_IN_PROGRESS_FLAG):
|
||||||
|
LOG.info("Restore flag is still on. Do nothing now. ")
|
||||||
|
return
|
||||||
|
|
||||||
|
active_mons, required_mons, __ = \
|
||||||
|
self._ceph.get_monitors_status(pecan.request.dbapi)
|
||||||
|
if required_mons > active_mons:
|
||||||
|
LOG.info("Not enough monitors yet available to fix crushmap.")
|
||||||
|
else:
|
||||||
|
LOG.info("Restore Ceph config ...")
|
||||||
|
# First restore ceph config
|
||||||
try:
|
try:
|
||||||
if not pecan.request.rpcapi.restore_ceph_config(
|
if not pecan.request.rpcapi.restore_ceph_config(
|
||||||
pecan.request.context, after_storage_enabled=True):
|
pecan.request.context):
|
||||||
raise Exception("restore_ceph_config returned false")
|
raise Exception("restore_ceph_config returned false")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise wsme.exc.ClientSideError(
|
raise wsme.exc.ClientSideError(
|
||||||
_("Restore Ceph config failed: %s" % e))
|
_("Restore Ceph config failed: %s" % e))
|
||||||
|
|
||||||
else:
|
# Set Ceph backend task to None
|
||||||
# TODO(wz): Need more work to restore ceph for 2+2
|
api.storage_backend_update(backend.uuid, {'task': None})
|
||||||
pass
|
|
||||||
|
# Apply runtime manifests for OSDs on two controller nodes.
|
||||||
|
c_hosts = api.ihost_get_by_personality(
|
||||||
|
constants.CONTROLLER
|
||||||
|
)
|
||||||
|
|
||||||
|
runtime_manifests = True
|
||||||
|
for c_host in c_hosts:
|
||||||
|
istors = pecan.request.dbapi.istor_get_by_ihost(c_host.uuid)
|
||||||
|
for stor in istors:
|
||||||
|
pecan.request.rpcapi.update_ceph_osd_config(
|
||||||
|
pecan.request.context,
|
||||||
|
c_host,
|
||||||
|
stor.uuid,
|
||||||
|
runtime_manifests)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def update_ihost_action(action, hostupdate):
|
def update_ihost_action(action, hostupdate):
|
||||||
|
@ -725,11 +725,12 @@ def fix_crushmap(dbapi=None):
|
|||||||
LOG.info("Not enough monitors yet available to fix crushmap.")
|
LOG.info("Not enough monitors yet available to fix crushmap.")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# Crushmap may be already loaded thorough puppet, avoid doing it twice.
|
# Crushmap may be already loaded through puppet, avoid doing it twice.
|
||||||
default_ceph_tier_name = constants.SB_TIER_DEFAULT_NAMES[
|
default_ceph_tier_name = constants.SB_TIER_DEFAULT_NAMES[
|
||||||
constants.SB_TIER_TYPE_CEPH] + constants.CEPH_CRUSH_TIER_SUFFIX
|
constants.SB_TIER_TYPE_CEPH] + constants.CEPH_CRUSH_TIER_SUFFIX
|
||||||
rule_is_present, __, __ = _operator._crush_rule_status(default_ceph_tier_name)
|
rule_is_present, __, __ = _operator._crush_rule_status(default_ceph_tier_name)
|
||||||
if rule_is_present:
|
if rule_is_present:
|
||||||
|
LOG.info("Crushmap is already loaded through puppet.")
|
||||||
_create_crushmap_flag_file()
|
_create_crushmap_flag_file()
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
@ -371,6 +371,7 @@ class CephOperator(object):
|
|||||||
except IOError as e:
|
except IOError as e:
|
||||||
LOG.warn(_('Failed to create flag file: {}. '
|
LOG.warn(_('Failed to create flag file: {}. '
|
||||||
'Reason: {}').format(crushmap_flag_file, e))
|
'Reason: {}').format(crushmap_flag_file, e))
|
||||||
|
LOG.info("Ceph crushmap is set.")
|
||||||
except OSError as e:
|
except OSError as e:
|
||||||
LOG.warn(_('Failed to restore Ceph crush map. '
|
LOG.warn(_('Failed to restore Ceph crush map. '
|
||||||
'Reason: {}').format(e))
|
'Reason: {}').format(e))
|
||||||
@ -390,7 +391,7 @@ class CephOperator(object):
|
|||||||
if int(osd_stats['num_osds']) > 0:
|
if int(osd_stats['num_osds']) > 0:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
LOG.info("osdmap is empty, restoring Ceph config...")
|
LOG.info("osdmap is empty, creating osds...")
|
||||||
return self.rebuild_osdmap()
|
return self.rebuild_osdmap()
|
||||||
|
|
||||||
# TODO(CephPoolsDecouple): remove
|
# TODO(CephPoolsDecouple): remove
|
||||||
|
@ -157,7 +157,15 @@ class CephPuppet(openstack.OpenstackBasePuppet):
|
|||||||
|
|
||||||
def get_host_config(self, host):
|
def get_host_config(self, host):
|
||||||
config = {}
|
config = {}
|
||||||
if host.personality in [constants.CONTROLLER, constants.STORAGE]:
|
backend = StorageBackendConfig.get_configured_backend(
|
||||||
|
self.dbapi,
|
||||||
|
constants.CINDER_BACKEND_CEPH)
|
||||||
|
# Do not write osd_config in controller hieradata
|
||||||
|
# during restore
|
||||||
|
if host.personality == constants.STORAGE:
|
||||||
|
config.update(self._get_ceph_osd_config(host))
|
||||||
|
elif (host.personality == constants.CONTROLLER and
|
||||||
|
backend.task != constants.SB_TASK_RESTORE):
|
||||||
config.update(self._get_ceph_osd_config(host))
|
config.update(self._get_ceph_osd_config(host))
|
||||||
config.update(self._get_ceph_mon_config(host))
|
config.update(self._get_ceph_mon_config(host))
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user