utilities/ceph/ceph-manager/ceph-manager/ceph_manager/server.py

250 lines
8.7 KiB
Python

# vim: tabstop=4 shiftwidth=4 softtabstop=4
#
# Copyright (c) 2016-2018 Wind River Systems, Inc.
#
# SPDX-License-Identifier: Apache-2.0
#
# https://chrigl.de/posts/2014/08/27/oslo-messaging-example.html
# http://docs.openstack.org/developer/oslo.messaging/server.html
import sys
# noinspection PyUnresolvedReferences
import eventlet
# noinspection PyUnresolvedReferences
import oslo_messaging as messaging
# noinspection PyUnresolvedReferences
from fm_api import fm_api
# noinspection PyUnresolvedReferences
from oslo_config import cfg
# noinspection PyUnresolvedReferences
from oslo_log import log as logging
# noinspection PyUnresolvedReferences
from oslo_service import service
# noinspection PyUnresolvedReferences
from oslo_service.periodic_task import PeriodicTasks
# noinspection PyUnresolvedReferences
from oslo_service import loopingcall
from sysinv.conductor.cache_tiering_service_config import ServiceConfig
# noinspection PyUnresolvedReferences
from cephclient import wrapper
from monitor import Monitor
from cache_tiering import CacheTiering
import exception
import constants
from i18n import _LI, _LW
from retrying import retry
eventlet.monkey_patch(all=True)
CONF = cfg.CONF
CONF.register_opts([
cfg.StrOpt('sysinv_api_bind_ip',
default='0.0.0.0',
help='IP for the Ceph Manager server to bind to')])
CONF.logging_default_format_string = (
'%(asctime)s.%(msecs)03d %(process)d '
'%(levelname)s %(name)s [-] %(message)s')
logging.register_options(CONF)
logging.setup(CONF, __name__)
LOG = logging.getLogger(__name__)
CONF.rpc_backend = 'rabbit'
class RpcEndpoint(PeriodicTasks):
def __init__(self, service=None):
self.service = service
def cache_tiering_enable_cache(self, _, new_config, applied_config):
LOG.info(_LI("Enabling cache"))
try:
self.service.cache_tiering.enable_cache(
new_config, applied_config)
except exception.CephManagerException as e:
self.service.sysinv_conductor.call(
{}, 'cache_tiering_enable_cache_complete',
success=False, exception=str(e.message),
new_config=new_config, applied_config=applied_config)
def cache_tiering_disable_cache(self, _, new_config, applied_config):
LOG.info(_LI("Disabling cache"))
try:
self.service.cache_tiering.disable_cache(
new_config, applied_config)
except exception.CephManagerException as e:
self.service.sysinv_conductor.call(
{}, 'cache_tiering_disable_cache_complete',
success=False, exception=str(e.message),
new_config=new_config, applied_config=applied_config)
def cache_tiering_operation_in_progress(self, _):
is_locked = self.service.cache_tiering.is_locked()
LOG.info(_LI("Cache tiering operation "
"is in progress: %s") % str(is_locked).lower())
return is_locked
def get_primary_tier_size(self, _):
"""Get the ceph size for the primary tier.
returns: an int for the size (in GB) of the tier
"""
tiers_size = self.service.monitor.tiers_size
primary_tier_size = tiers_size.get(
self.service.monitor.primary_tier_name, 0)
LOG.debug(_LI("Ceph cluster primary tier size: %s GB") %
str(primary_tier_size))
return primary_tier_size
def get_tiers_size(self, _):
"""Get the ceph cluster tier sizes.
returns: a dict of sizes (in GB) by tier name
"""
tiers_size = self.service.monitor.tiers_size
LOG.debug(_LI("Ceph cluster tiers (size in GB): %s") %
str(tiers_size))
return tiers_size
def is_cluster_up(self, _):
"""Report if the last health check was successful.
This is an independent view of the cluster accessibility that can be
used by the sysinv conductor to gate ceph API calls which would timeout
and potentially block other operations.
This view is only updated at the rate the monitor checks for a cluster
uuid or a health check (CEPH_HEALTH_CHECK_INTERVAL)
returns: boolean True if last health check was successful else False
"""
return self.service.monitor.cluster_is_up
# This class is needed only when upgrading from 16.10 to 17.x
# TODO: remove it after 1st 17.x release
#
class SysinvConductorUpgradeApi(object):
def __init__(self):
self.sysinv_conductor = None
super(SysinvConductorUpgradeApi, self).__init__()
def get_software_upgrade_status(self):
LOG.info(_LI("Getting software upgrade status from sysinv"))
cctxt = self.sysinv_conductor.prepare(timeout=2)
upgrade = cctxt.call({}, 'get_software_upgrade_status')
LOG.info(_LI("Software upgrade status: %s") % str(upgrade))
return upgrade
@retry(wait_fixed=1000,
retry_on_exception=lambda exception:
LOG.warn(_LW(
"Getting software upgrade status failed "
"with: %s. Retrying... ") % str(exception)) or True)
def retry_get_software_upgrade_status(self):
return self.get_software_upgrade_status()
class Service(SysinvConductorUpgradeApi, service.Service):
def __init__(self, conf):
super(Service, self).__init__()
self.conf = conf
self.rpc_server = None
self.sysinv_conductor = None
self.ceph_api = None
self.entity_instance_id = ''
self.fm_api = fm_api.FaultAPIs()
self.monitor = Monitor(self)
self.cache_tiering = CacheTiering(self)
self.config = None
self.config_desired = None
self.config_applied = None
def start(self):
super(Service, self).start()
transport = messaging.get_transport(self.conf)
self.sysinv_conductor = messaging.RPCClient(
transport,
messaging.Target(
topic=constants.SYSINV_CONDUCTOR_TOPIC))
self.ceph_api = wrapper.CephWrapper(
endpoint='http://localhost:5001/api/v0.1/')
# Get initial config from sysinv and send it to
# services that need it before starting them
config = self.get_caching_tier_config()
self.monitor.setup(config)
self.rpc_server = messaging.get_rpc_server(
transport,
messaging.Target(topic=constants.CEPH_MANAGER_TOPIC,
server=self.conf.sysinv_api_bind_ip),
[RpcEndpoint(self)],
executor='eventlet')
self.rpc_server.start()
self.cache_tiering.set_initial_config(config)
eventlet.spawn_n(self.monitor.run)
periodic = loopingcall.FixedIntervalLoopingCall(
self.update_ceph_target_max_bytes)
periodic.start(interval=300)
def get_caching_tier_config(self):
LOG.info("Getting cache tiering configuration from sysinv")
while True:
# Get initial configuration from sysinv,
# retry until sysinv starts
try:
cctxt = self.sysinv_conductor.prepare(timeout=2)
config = cctxt.call({}, 'cache_tiering_get_config')
for section in config:
if section == constants.CACHE_TIERING:
self.config = ServiceConfig().from_dict(
config[section])
elif section == constants.CACHE_TIERING_DESIRED:
self.config_desired = ServiceConfig().from_dict(
config[section])
elif section == constants.CACHE_TIERING_APPLIED:
self.config_applied = ServiceConfig().from_dict(
config[section])
LOG.info("Cache tiering configs: {}".format(config))
return config
except Exception as ex:
# In production we should retry on every error until connection
# is reestablished.
LOG.warn("Getting cache tiering configuration failed "
"with: {}. Retrying... ".format(str(ex)))
def stop(self):
try:
self.rpc_server.stop()
self.rpc_server.wait()
except Exception:
pass
super(Service, self).stop()
def update_ceph_target_max_bytes(self):
try:
self.cache_tiering.update_cache_target_max_bytes()
except Exception as ex:
LOG.exception("Updating Ceph target max bytes failed "
"with: {} retrying on next cycle.".format(str(ex)))
def run_service():
CONF(sys.argv[1:])
logging.setup(CONF, "ceph-manager")
launcher = service.launch(CONF, Service(CONF), workers=1)
launcher.wait()
if __name__ == "__main__":
run_service()