# vim: tabstop=4 shiftwidth=4 softtabstop=4 # # Copyright (c) 2016-2018 Wind River Systems, Inc. # # SPDX-License-Identifier: Apache-2.0 # # https://chrigl.de/posts/2014/08/27/oslo-messaging-example.html # http://docs.openstack.org/developer/oslo.messaging/server.html import sys # noinspection PyUnresolvedReferences import eventlet # noinspection PyUnresolvedReferences import oslo_messaging as messaging # noinspection PyUnresolvedReferences from fm_api import fm_api # noinspection PyUnresolvedReferences from oslo_config import cfg # noinspection PyUnresolvedReferences from oslo_log import log as logging # noinspection PyUnresolvedReferences from oslo_service import service # noinspection PyUnresolvedReferences from oslo_service.periodic_task import PeriodicTasks # noinspection PyUnresolvedReferences from oslo_service import loopingcall from sysinv.conductor.cache_tiering_service_config import ServiceConfig # noinspection PyUnresolvedReferences from cephclient import wrapper from monitor import Monitor from cache_tiering import CacheTiering import exception import constants from i18n import _LI, _LW from retrying import retry eventlet.monkey_patch(all=True) CONF = cfg.CONF CONF.register_opts([ cfg.StrOpt('sysinv_api_bind_ip', default='0.0.0.0', help='IP for the Ceph Manager server to bind to')]) CONF.logging_default_format_string = ( '%(asctime)s.%(msecs)03d %(process)d ' '%(levelname)s %(name)s [-] %(message)s') logging.register_options(CONF) logging.setup(CONF, __name__) LOG = logging.getLogger(__name__) CONF.rpc_backend = 'rabbit' class RpcEndpoint(PeriodicTasks): def __init__(self, service=None): self.service = service def cache_tiering_enable_cache(self, _, new_config, applied_config): LOG.info(_LI("Enabling cache")) try: self.service.cache_tiering.enable_cache( new_config, applied_config) except exception.CephManagerException as e: self.service.sysinv_conductor.call( {}, 'cache_tiering_enable_cache_complete', success=False, exception=str(e.message), new_config=new_config, applied_config=applied_config) def cache_tiering_disable_cache(self, _, new_config, applied_config): LOG.info(_LI("Disabling cache")) try: self.service.cache_tiering.disable_cache( new_config, applied_config) except exception.CephManagerException as e: self.service.sysinv_conductor.call( {}, 'cache_tiering_disable_cache_complete', success=False, exception=str(e.message), new_config=new_config, applied_config=applied_config) def cache_tiering_operation_in_progress(self, _): is_locked = self.service.cache_tiering.is_locked() LOG.info(_LI("Cache tiering operation " "is in progress: %s") % str(is_locked).lower()) return is_locked def get_primary_tier_size(self, _): """Get the ceph size for the primary tier. returns: an int for the size (in GB) of the tier """ tiers_size = self.service.monitor.tiers_size primary_tier_size = tiers_size.get( self.service.monitor.primary_tier_name, 0) LOG.debug(_LI("Ceph cluster primary tier size: %s GB") % str(primary_tier_size)) return primary_tier_size def get_tiers_size(self, _): """Get the ceph cluster tier sizes. returns: a dict of sizes (in GB) by tier name """ tiers_size = self.service.monitor.tiers_size LOG.debug(_LI("Ceph cluster tiers (size in GB): %s") % str(tiers_size)) return tiers_size def is_cluster_up(self, _): """Report if the last health check was successful. This is an independent view of the cluster accessibility that can be used by the sysinv conductor to gate ceph API calls which would timeout and potentially block other operations. This view is only updated at the rate the monitor checks for a cluster uuid or a health check (CEPH_HEALTH_CHECK_INTERVAL) returns: boolean True if last health check was successful else False """ return self.service.monitor.cluster_is_up # This class is needed only when upgrading from 16.10 to 17.x # TODO: remove it after 1st 17.x release # class SysinvConductorUpgradeApi(object): def __init__(self): self.sysinv_conductor = None super(SysinvConductorUpgradeApi, self).__init__() def get_software_upgrade_status(self): LOG.info(_LI("Getting software upgrade status from sysinv")) cctxt = self.sysinv_conductor.prepare(timeout=2) upgrade = cctxt.call({}, 'get_software_upgrade_status') LOG.info(_LI("Software upgrade status: %s") % str(upgrade)) return upgrade @retry(wait_fixed=1000, retry_on_exception=lambda exception: LOG.warn(_LW( "Getting software upgrade status failed " "with: %s. Retrying... ") % str(exception)) or True) def retry_get_software_upgrade_status(self): return self.get_software_upgrade_status() class Service(SysinvConductorUpgradeApi, service.Service): def __init__(self, conf): super(Service, self).__init__() self.conf = conf self.rpc_server = None self.sysinv_conductor = None self.ceph_api = None self.entity_instance_id = '' self.fm_api = fm_api.FaultAPIs() self.monitor = Monitor(self) self.cache_tiering = CacheTiering(self) self.config = None self.config_desired = None self.config_applied = None def start(self): super(Service, self).start() transport = messaging.get_transport(self.conf) self.sysinv_conductor = messaging.RPCClient( transport, messaging.Target( topic=constants.SYSINV_CONDUCTOR_TOPIC)) self.ceph_api = wrapper.CephWrapper( endpoint='http://localhost:5001/api/v0.1/') # Get initial config from sysinv and send it to # services that need it before starting them config = self.get_caching_tier_config() self.monitor.setup(config) self.rpc_server = messaging.get_rpc_server( transport, messaging.Target(topic=constants.CEPH_MANAGER_TOPIC, server=self.conf.sysinv_api_bind_ip), [RpcEndpoint(self)], executor='eventlet') self.rpc_server.start() self.cache_tiering.set_initial_config(config) eventlet.spawn_n(self.monitor.run) periodic = loopingcall.FixedIntervalLoopingCall( self.update_ceph_target_max_bytes) periodic.start(interval=300) def get_caching_tier_config(self): LOG.info("Getting cache tiering configuration from sysinv") while True: # Get initial configuration from sysinv, # retry until sysinv starts try: cctxt = self.sysinv_conductor.prepare(timeout=2) config = cctxt.call({}, 'cache_tiering_get_config') for section in config: if section == constants.CACHE_TIERING: self.config = ServiceConfig().from_dict( config[section]) elif section == constants.CACHE_TIERING_DESIRED: self.config_desired = ServiceConfig().from_dict( config[section]) elif section == constants.CACHE_TIERING_APPLIED: self.config_applied = ServiceConfig().from_dict( config[section]) LOG.info("Cache tiering configs: {}".format(config)) return config except Exception as ex: # In production we should retry on every error until connection # is reestablished. LOG.warn("Getting cache tiering configuration failed " "with: {}. Retrying... ".format(str(ex))) def stop(self): try: self.rpc_server.stop() self.rpc_server.wait() except Exception: pass super(Service, self).stop() def update_ceph_target_max_bytes(self): try: self.cache_tiering.update_cache_target_max_bytes() except Exception as ex: LOG.exception("Updating Ceph target max bytes failed " "with: {} retrying on next cycle.".format(str(ex))) def run_service(): CONF(sys.argv[1:]) logging.setup(CONF, "ceph-manager") launcher = service.launch(CONF, Service(CONF), workers=1) launcher.wait() if __name__ == "__main__": run_service()