f783d009ba
oslo.service replaces the loopingcall code from oslo-incubator so switch to the new oslo.service module. Change-Id: Iba8a86cc79f8bbda4b04c71568dd9a33f4ace6f6
624 lines
21 KiB
Python
624 lines
21 KiB
Python
# Copyright 2010 United States Government as represented by the
|
|
# Administrator of the National Aeronautics and Space Administration.
|
|
# Copyright 2011 Justin Santa Barbara
|
|
# All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
|
# not use this file except in compliance with the License. You may obtain
|
|
# a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
# License for the specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
"""Generic Node base class for all workers that run on hosts."""
|
|
|
|
import errno
|
|
import inspect
|
|
import os
|
|
import random
|
|
import signal
|
|
import sys
|
|
import time
|
|
|
|
import eventlet
|
|
import greenlet
|
|
from oslo_config import cfg
|
|
from oslo_log import log
|
|
import oslo_messaging as messaging
|
|
from oslo_service import loopingcall
|
|
from oslo_utils import importutils
|
|
|
|
from manila import context
|
|
from manila import db
|
|
from manila import exception
|
|
from manila.i18n import _LE
|
|
from manila.i18n import _LI
|
|
from manila.i18n import _LW
|
|
from manila import rpc
|
|
from manila import version
|
|
from manila import wsgi
|
|
|
|
LOG = log.getLogger(__name__)
|
|
|
|
service_opts = [
|
|
cfg.IntOpt('report_interval',
|
|
default=10,
|
|
help='Seconds between nodes reporting state to datastore.'),
|
|
cfg.IntOpt('periodic_interval',
|
|
default=60,
|
|
help='Seconds between running periodic tasks.'),
|
|
cfg.IntOpt('periodic_fuzzy_delay',
|
|
default=60,
|
|
help='Range of seconds to randomly delay when starting the '
|
|
'periodic task scheduler to reduce stampeding. '
|
|
'(Disable by setting to 0)'),
|
|
cfg.StrOpt('osapi_share_listen',
|
|
default="0.0.0.0",
|
|
help='IP address for OpenStack Share API to listen on.'),
|
|
cfg.IntOpt('osapi_share_listen_port',
|
|
default=8786,
|
|
help='Port for OpenStack Share API to listen on.'), ]
|
|
|
|
CONF = cfg.CONF
|
|
CONF.register_opts(service_opts)
|
|
|
|
|
|
class SignalExit(SystemExit):
|
|
def __init__(self, signo, exccode=1):
|
|
super(SignalExit, self).__init__(exccode)
|
|
self.signo = signo
|
|
|
|
|
|
class Launcher(object):
|
|
"""Launch one or more services and wait for them to complete."""
|
|
|
|
def __init__(self):
|
|
"""Initialize the service launcher.
|
|
|
|
:returns: None
|
|
|
|
"""
|
|
self._services = []
|
|
|
|
@staticmethod
|
|
def run_server(server):
|
|
"""Start and wait for a server to finish.
|
|
|
|
:param service: Server to run and wait for.
|
|
:returns: None
|
|
|
|
"""
|
|
server.start()
|
|
server.wait()
|
|
|
|
def launch_server(self, server):
|
|
"""Load and start the given server.
|
|
|
|
:param server: The server you would like to start.
|
|
:returns: None
|
|
|
|
"""
|
|
gt = eventlet.spawn(self.run_server, server)
|
|
self._services.append(gt)
|
|
|
|
def stop(self):
|
|
"""Stop all services which are currently running.
|
|
|
|
:returns: None
|
|
|
|
"""
|
|
for service in self._services:
|
|
service.kill()
|
|
|
|
def wait(self):
|
|
"""Waits until all services have been stopped, and then returns.
|
|
|
|
:returns: None
|
|
|
|
"""
|
|
def sigterm(sig, frame):
|
|
LOG.info(_LI("SIGTERM received"))
|
|
# NOTE(jk0): Raise a ^C which is caught by the caller and cleanly
|
|
# shuts down the service. This does not yet handle eventlet
|
|
# threads.
|
|
raise KeyboardInterrupt
|
|
|
|
signal.signal(signal.SIGTERM, sigterm)
|
|
|
|
for service in self._services:
|
|
try:
|
|
service.wait()
|
|
except greenlet.GreenletExit:
|
|
pass
|
|
|
|
|
|
class ServerWrapper(object):
|
|
def __init__(self, server, workers):
|
|
self.server = server
|
|
self.workers = workers
|
|
self.children = set()
|
|
self.forktimes = []
|
|
self.failed = False
|
|
|
|
|
|
class ProcessLauncher(object):
|
|
def __init__(self):
|
|
self.children = {}
|
|
self.sigcaught = None
|
|
self.totalwrap = 0
|
|
self.failedwrap = 0
|
|
self.running = True
|
|
rfd, self.writepipe = os.pipe()
|
|
self.readpipe = eventlet.greenio.GreenPipe(rfd, 'r')
|
|
|
|
signal.signal(signal.SIGTERM, self._handle_signal)
|
|
signal.signal(signal.SIGINT, self._handle_signal)
|
|
|
|
def _handle_signal(self, signo, frame):
|
|
self.sigcaught = signo
|
|
self.running = False
|
|
|
|
# Allow the process to be killed again and die from natural causes
|
|
signal.signal(signal.SIGTERM, signal.SIG_DFL)
|
|
signal.signal(signal.SIGINT, signal.SIG_DFL)
|
|
|
|
def _pipe_watcher(self):
|
|
# This will block until the write end is closed when the parent
|
|
# dies unexpectedly
|
|
self.readpipe.read()
|
|
|
|
LOG.info(_LI('Parent process has died unexpectedly, exiting'))
|
|
|
|
sys.exit(1)
|
|
|
|
def _child_process(self, server):
|
|
# Setup child signal handlers differently
|
|
def _sigterm(*args):
|
|
signal.signal(signal.SIGTERM, signal.SIG_DFL)
|
|
raise SignalExit(signal.SIGTERM)
|
|
|
|
signal.signal(signal.SIGTERM, _sigterm)
|
|
# Block SIGINT and let the parent send us a SIGTERM
|
|
# signal.signal(signal.SIGINT, signal.SIG_IGN)
|
|
# This differs from the behavior in nova in that we dont ignore this
|
|
# It allows the non-wsgi services to be terminated properly
|
|
signal.signal(signal.SIGINT, _sigterm)
|
|
|
|
# Reopen the eventlet hub to make sure we don't share an epoll
|
|
# fd with parent and/or siblings, which would be bad
|
|
eventlet.hubs.use_hub()
|
|
|
|
# Close write to ensure only parent has it open
|
|
os.close(self.writepipe)
|
|
# Create greenthread to watch for parent to close pipe
|
|
eventlet.spawn(self._pipe_watcher)
|
|
|
|
# Reseed random number generator
|
|
random.seed()
|
|
|
|
launcher = Launcher()
|
|
launcher.run_server(server)
|
|
|
|
def _start_child(self, wrap):
|
|
if len(wrap.forktimes) > wrap.workers:
|
|
# Limit ourselves to one process a second (over the period of
|
|
# number of workers * 1 second). This will allow workers to
|
|
# start up quickly but ensure we don't fork off children that
|
|
# die instantly too quickly.
|
|
if time.time() - wrap.forktimes[0] < wrap.workers:
|
|
LOG.info(_LI('Forking too fast, sleeping'))
|
|
time.sleep(1)
|
|
|
|
wrap.forktimes.pop(0)
|
|
|
|
wrap.forktimes.append(time.time())
|
|
|
|
pid = os.fork()
|
|
if pid == 0:
|
|
# NOTE(johannes): All exceptions are caught to ensure this
|
|
# doesn't fallback into the loop spawning children. It would
|
|
# be bad for a child to spawn more children.
|
|
status = 0
|
|
try:
|
|
self._child_process(wrap.server)
|
|
except SignalExit as exc:
|
|
signame = {signal.SIGTERM: 'SIGTERM',
|
|
signal.SIGINT: 'SIGINT'}[exc.signo]
|
|
LOG.info(_LI('Caught %s, exiting'), signame)
|
|
status = exc.code
|
|
except SystemExit as exc:
|
|
status = exc.code
|
|
except BaseException:
|
|
LOG.exception(_LE('Unhandled exception'))
|
|
status = 2
|
|
finally:
|
|
wrap.server.stop()
|
|
|
|
os._exit(status)
|
|
|
|
LOG.info(_LI('Started child %d'), pid)
|
|
|
|
wrap.children.add(pid)
|
|
self.children[pid] = wrap
|
|
|
|
return pid
|
|
|
|
def launch_server(self, server, workers=1):
|
|
wrap = ServerWrapper(server, workers)
|
|
self.totalwrap = self.totalwrap + 1
|
|
LOG.info(_LI('Starting %d workers'), wrap.workers)
|
|
while (self.running and len(wrap.children) < wrap.workers
|
|
and not wrap.failed):
|
|
self._start_child(wrap)
|
|
|
|
def _wait_child(self):
|
|
try:
|
|
# Don't block if no child processes have exited
|
|
pid, status = os.waitpid(0, os.WNOHANG)
|
|
if not pid:
|
|
return None
|
|
except OSError as exc:
|
|
if exc.errno not in (errno.EINTR, errno.ECHILD):
|
|
raise
|
|
return None
|
|
|
|
code = 0
|
|
if os.WIFSIGNALED(status):
|
|
sig = os.WTERMSIG(status)
|
|
LOG.info(_LI('Child %(pid)d killed by signal %(sig)d'),
|
|
{'pid': pid, 'sig': sig})
|
|
else:
|
|
code = os.WEXITSTATUS(status)
|
|
LOG.info(_LI('Child %(pid)d exited with status %(code)d'),
|
|
{'pid': pid, 'code': code})
|
|
|
|
if pid not in self.children:
|
|
LOG.warning(_LW('pid %d not in child list'), pid)
|
|
return None
|
|
|
|
wrap = self.children.pop(pid)
|
|
wrap.children.remove(pid)
|
|
if 2 == code:
|
|
wrap.failed = True
|
|
self.failedwrap = self.failedwrap + 1
|
|
LOG.info(_LI('_wait_child %d'), self.failedwrap)
|
|
if self.failedwrap == self.totalwrap:
|
|
self.running = False
|
|
return wrap
|
|
|
|
def wait(self):
|
|
"""Loop waiting on children to die and respawning as necessary."""
|
|
while self.running:
|
|
wrap = self._wait_child()
|
|
if not wrap:
|
|
# Yield to other threads if no children have exited
|
|
# Sleep for a short time to avoid excessive CPU usage
|
|
# (see bug #1095346)
|
|
eventlet.greenthread.sleep(.01)
|
|
continue
|
|
|
|
LOG.info(_LI('wait wrap.failed %s'), wrap.failed)
|
|
while (self.running and len(wrap.children) < wrap.workers
|
|
and not wrap.failed):
|
|
self._start_child(wrap)
|
|
|
|
if self.sigcaught:
|
|
signame = {signal.SIGTERM: 'SIGTERM',
|
|
signal.SIGINT: 'SIGINT'}[self.sigcaught]
|
|
LOG.info(_LI('Caught %s, stopping children'), signame)
|
|
|
|
for pid in self.children:
|
|
try:
|
|
os.kill(pid, signal.SIGTERM)
|
|
except OSError as exc:
|
|
if exc.errno != errno.ESRCH:
|
|
raise
|
|
|
|
# Wait for children to die
|
|
if self.children:
|
|
LOG.info(_LI('Waiting on %d children to exit'), len(self.children))
|
|
while self.children:
|
|
self._wait_child()
|
|
|
|
|
|
class Service(object):
|
|
"""Service object for binaries running on hosts.
|
|
|
|
A service takes a manager and enables rpc by listening to queues based
|
|
on topic. It also periodically runs tasks on the manager and reports
|
|
it state to the database services table.
|
|
"""
|
|
|
|
def __init__(self, host, binary, topic, manager, report_interval=None,
|
|
periodic_interval=None, periodic_fuzzy_delay=None,
|
|
service_name=None, *args, **kwargs):
|
|
super(Service, self).__init__()
|
|
if not rpc.initialized():
|
|
rpc.init(CONF)
|
|
self.host = host
|
|
self.binary = binary
|
|
self.topic = topic
|
|
self.manager_class_name = manager
|
|
manager_class = importutils.import_class(self.manager_class_name)
|
|
self.manager = manager_class(host=self.host,
|
|
service_name=service_name,
|
|
*args, **kwargs)
|
|
self.report_interval = report_interval
|
|
self.periodic_interval = periodic_interval
|
|
self.periodic_fuzzy_delay = periodic_fuzzy_delay
|
|
self.saved_args, self.saved_kwargs = args, kwargs
|
|
self.timers = []
|
|
|
|
def start(self):
|
|
version_string = version.version_string()
|
|
LOG.info(_LI('Starting %(topic)s node (version %(version_string)s)'),
|
|
{'topic': self.topic, 'version_string': version_string})
|
|
self.model_disconnected = False
|
|
ctxt = context.get_admin_context()
|
|
try:
|
|
service_ref = db.service_get_by_args(ctxt,
|
|
self.host,
|
|
self.binary)
|
|
self.service_id = service_ref['id']
|
|
except exception.NotFound:
|
|
self._create_service_ref(ctxt)
|
|
|
|
LOG.debug("Creating RPC server for service %s.", self.topic)
|
|
|
|
target = messaging.Target(topic=self.topic, server=self.host)
|
|
endpoints = [self.manager]
|
|
endpoints.extend(self.manager.additional_endpoints)
|
|
self.rpcserver = rpc.get_server(target, endpoints)
|
|
self.rpcserver.start()
|
|
|
|
self.manager.init_host()
|
|
if self.report_interval:
|
|
pulse = loopingcall.FixedIntervalLoopingCall(self.report_state)
|
|
pulse.start(interval=self.report_interval,
|
|
initial_delay=self.report_interval)
|
|
self.timers.append(pulse)
|
|
|
|
if self.periodic_interval:
|
|
if self.periodic_fuzzy_delay:
|
|
initial_delay = random.randint(0, self.periodic_fuzzy_delay)
|
|
else:
|
|
initial_delay = None
|
|
|
|
periodic = loopingcall.FixedIntervalLoopingCall(
|
|
self.periodic_tasks)
|
|
periodic.start(interval=self.periodic_interval,
|
|
initial_delay=initial_delay)
|
|
self.timers.append(periodic)
|
|
|
|
def _create_service_ref(self, context):
|
|
zone = CONF.storage_availability_zone
|
|
service_ref = db.service_create(context,
|
|
{'host': self.host,
|
|
'binary': self.binary,
|
|
'topic': self.topic,
|
|
'report_count': 0,
|
|
'availability_zone': zone})
|
|
self.service_id = service_ref['id']
|
|
|
|
def __getattr__(self, key):
|
|
manager = self.__dict__.get('manager', None)
|
|
return getattr(manager, key)
|
|
|
|
@classmethod
|
|
def create(cls, host=None, binary=None, topic=None, manager=None,
|
|
report_interval=None, periodic_interval=None,
|
|
periodic_fuzzy_delay=None, service_name=None):
|
|
"""Instantiates class and passes back application object.
|
|
|
|
:param host: defaults to CONF.host
|
|
:param binary: defaults to basename of executable
|
|
:param topic: defaults to bin_name - 'manila-' part
|
|
:param manager: defaults to CONF.<topic>_manager
|
|
:param report_interval: defaults to CONF.report_interval
|
|
:param periodic_interval: defaults to CONF.periodic_interval
|
|
:param periodic_fuzzy_delay: defaults to CONF.periodic_fuzzy_delay
|
|
|
|
"""
|
|
if not host:
|
|
host = CONF.host
|
|
if not binary:
|
|
binary = os.path.basename(inspect.stack()[-1][1])
|
|
if not topic:
|
|
topic = binary
|
|
if not manager:
|
|
subtopic = topic.rpartition('manila-')[2]
|
|
manager = CONF.get('%s_manager' % subtopic, None)
|
|
if report_interval is None:
|
|
report_interval = CONF.report_interval
|
|
if periodic_interval is None:
|
|
periodic_interval = CONF.periodic_interval
|
|
if periodic_fuzzy_delay is None:
|
|
periodic_fuzzy_delay = CONF.periodic_fuzzy_delay
|
|
service_obj = cls(host, binary, topic, manager,
|
|
report_interval=report_interval,
|
|
periodic_interval=periodic_interval,
|
|
periodic_fuzzy_delay=periodic_fuzzy_delay,
|
|
service_name=service_name)
|
|
|
|
return service_obj
|
|
|
|
def kill(self):
|
|
"""Destroy the service object in the datastore."""
|
|
self.stop()
|
|
try:
|
|
db.service_destroy(context.get_admin_context(), self.service_id)
|
|
except exception.NotFound:
|
|
LOG.warn(_LW('Service killed that has no database entry'))
|
|
|
|
def stop(self):
|
|
# Try to shut the connection down, but if we get any sort of
|
|
# errors, go ahead and ignore them.. as we're shutting down anyway
|
|
try:
|
|
self.rpcserver.stop()
|
|
except Exception:
|
|
pass
|
|
for x in self.timers:
|
|
try:
|
|
x.stop()
|
|
except Exception:
|
|
pass
|
|
self.timers = []
|
|
|
|
def wait(self):
|
|
for x in self.timers:
|
|
try:
|
|
x.wait()
|
|
except Exception:
|
|
pass
|
|
|
|
def periodic_tasks(self, raise_on_error=False):
|
|
"""Tasks to be run at a periodic interval."""
|
|
ctxt = context.get_admin_context()
|
|
self.manager.periodic_tasks(ctxt, raise_on_error=raise_on_error)
|
|
|
|
def report_state(self):
|
|
"""Update the state of this service in the datastore."""
|
|
ctxt = context.get_admin_context()
|
|
zone = CONF.storage_availability_zone
|
|
state_catalog = {}
|
|
try:
|
|
try:
|
|
service_ref = db.service_get(ctxt, self.service_id)
|
|
except exception.NotFound:
|
|
LOG.debug('The service database object disappeared, '
|
|
'Recreating it.')
|
|
self._create_service_ref(ctxt)
|
|
service_ref = db.service_get(ctxt, self.service_id)
|
|
|
|
state_catalog['report_count'] = service_ref['report_count'] + 1
|
|
if zone != service_ref['availability_zone']:
|
|
state_catalog['availability_zone'] = zone
|
|
|
|
db.service_update(ctxt,
|
|
self.service_id, state_catalog)
|
|
|
|
# TODO(termie): make this pattern be more elegant.
|
|
if getattr(self, 'model_disconnected', False):
|
|
self.model_disconnected = False
|
|
LOG.error(_LE('Recovered model server connection!'))
|
|
|
|
# TODO(vish): this should probably only catch connection errors
|
|
except Exception: # pylint: disable=W0702
|
|
if not getattr(self, 'model_disconnected', False):
|
|
self.model_disconnected = True
|
|
LOG.exception(_LE('model server went away'))
|
|
|
|
|
|
class WSGIService(object):
|
|
"""Provides ability to launch API from a 'paste' configuration."""
|
|
|
|
def __init__(self, name, loader=None):
|
|
"""Initialize, but do not start the WSGI server.
|
|
|
|
:param name: The name of the WSGI server given to the loader.
|
|
:param loader: Loads the WSGI application using the given name.
|
|
:returns: None
|
|
|
|
"""
|
|
self.name = name
|
|
self.manager = self._get_manager()
|
|
self.loader = loader or wsgi.Loader()
|
|
if not rpc.initialized():
|
|
rpc.init(CONF)
|
|
self.app = self.loader.load_app(name)
|
|
self.host = getattr(CONF, '%s_listen' % name, "0.0.0.0")
|
|
self.port = getattr(CONF, '%s_listen_port' % name, 0)
|
|
self.server = wsgi.Server(name,
|
|
self.app,
|
|
host=self.host,
|
|
port=self.port)
|
|
|
|
def _get_manager(self):
|
|
"""Initialize a Manager object appropriate for this service.
|
|
|
|
Use the service name to look up a Manager subclass from the
|
|
configuration and initialize an instance. If no class name
|
|
is configured, just return None.
|
|
|
|
:returns: a Manager instance, or None.
|
|
|
|
"""
|
|
fl = '%s_manager' % self.name
|
|
if fl not in CONF:
|
|
return None
|
|
|
|
manager_class_name = CONF.get(fl, None)
|
|
if not manager_class_name:
|
|
return None
|
|
|
|
manager_class = importutils.import_class(manager_class_name)
|
|
return manager_class()
|
|
|
|
def start(self):
|
|
"""Start serving this service using loaded configuration.
|
|
|
|
Also, retrieve updated port number in case '0' was passed in, which
|
|
indicates a random port should be used.
|
|
|
|
:returns: None
|
|
|
|
"""
|
|
if self.manager:
|
|
self.manager.init_host()
|
|
self.server.start()
|
|
self.port = self.server.port
|
|
|
|
def stop(self):
|
|
"""Stop serving this API.
|
|
|
|
:returns: None
|
|
|
|
"""
|
|
self.server.stop()
|
|
|
|
def wait(self):
|
|
"""Wait for the service to stop serving this API.
|
|
|
|
:returns: None
|
|
|
|
"""
|
|
self.server.wait()
|
|
|
|
|
|
# NOTE(vish): the global launcher is to maintain the existing
|
|
# functionality of calling service.serve +
|
|
# service.wait
|
|
_launcher = None
|
|
|
|
|
|
def serve(*servers):
|
|
global _launcher
|
|
if not _launcher:
|
|
_launcher = Launcher()
|
|
for server in servers:
|
|
_launcher.launch_server(server)
|
|
|
|
|
|
def wait():
|
|
LOG.debug('Full set of CONF:')
|
|
for flag in CONF:
|
|
flag_get = CONF.get(flag, None)
|
|
# hide flag contents from log if contains a password
|
|
# should use secret flag when switch over to openstack-common
|
|
if ("_password" in flag or "_key" in flag or
|
|
(flag == "sql_connection" and "mysql:" in flag_get)):
|
|
LOG.debug('%(flag)s : FLAG SET ', {"flag": flag})
|
|
else:
|
|
LOG.debug('%(flag)s : %(flag_get)s',
|
|
{"flag": flag, "flag_get": flag_get})
|
|
try:
|
|
_launcher.wait()
|
|
except KeyboardInterrupt:
|
|
_launcher.stop()
|
|
rpc.cleanup()
|