neutron/neutron/service.py
Bernard Cafarelli 13aa00026f
Re-use existing ProcessLauncher from wsgi in RPC workers
If both are run under the same process, and api_workers >= 2, the server
process will instantiate two oslo_service.ProcessLauncher instances

This should be avoided [0], and indeed causes issues on subprocess and
signal handling: killed RPC workers not respawning, SIGHUP on master
process leading to unresponsive server, signal not properly sent to all
child processes, ...

To avoid this, use the wsgi ProcessLauncher instance if it exists

[0] https://docs.openstack.org/oslo.service/latest/user/usage.html#launchers

Change-Id: Ic821f8ca84add9c8137ef712031afb43e491591c
Closes-Bug: #1780139
2020-02-07 14:51:06 +01:00

457 lines
15 KiB
Python

# Copyright 2011 VMware, Inc
# All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
import inspect
import os
import random
from neutron_lib.callbacks import events
from neutron_lib.callbacks import registry
from neutron_lib.callbacks import resources
from neutron_lib import context
from neutron_lib.db import api as session
from neutron_lib.plugins import directory
from neutron_lib import rpc as n_rpc
from oslo_concurrency import processutils
from oslo_config import cfg
from oslo_log import log as logging
from oslo_messaging import server as rpc_server
from oslo_service import loopingcall
from oslo_service import service as common_service
from oslo_utils import excutils
from oslo_utils import importutils
import psutil
from neutron.common import config
from neutron.common import profiler
from neutron.conf import service
from neutron import worker as neutron_worker
from neutron import wsgi
service.register_service_opts(service.SERVICE_OPTS)
service.register_service_opts(service.RPC_EXTRA_OPTS)
LOG = logging.getLogger(__name__)
class WsgiService(object):
"""Base class for WSGI based services.
For each api you define, you must also define these flags:
:<api>_listen: The address on which to listen
:<api>_listen_port: The port on which to listen
"""
def __init__(self, app_name):
self.app_name = app_name
self.wsgi_app = None
def start(self):
self.wsgi_app = _run_wsgi(self.app_name)
def wait(self):
self.wsgi_app.wait()
class NeutronApiService(WsgiService):
"""Class for neutron-api service."""
def __init__(self, app_name):
profiler.setup('neutron-server', cfg.CONF.host)
super(NeutronApiService, self).__init__(app_name)
@classmethod
def create(cls, app_name='neutron'):
# Setup logging early
config.setup_logging()
service = cls(app_name)
return service
def serve_wsgi(cls):
try:
service = cls.create()
service.start()
except Exception:
with excutils.save_and_reraise_exception():
LOG.exception('Unrecoverable error: please check log '
'for details.')
registry.publish(resources.PROCESS, events.BEFORE_SPAWN, service)
return service
class RpcWorker(neutron_worker.NeutronBaseWorker):
"""Wraps a worker to be handled by ProcessLauncher"""
start_listeners_method = 'start_rpc_listeners'
def __init__(self, plugins, worker_process_count=1):
super(RpcWorker, self).__init__(
worker_process_count=worker_process_count
)
self._plugins = plugins
self._servers = []
def start(self):
super(RpcWorker, self).start(desc="rpc worker")
for plugin in self._plugins:
if hasattr(plugin, self.start_listeners_method):
try:
servers = getattr(plugin, self.start_listeners_method)()
except NotImplementedError:
continue
self._servers.extend(servers)
def wait(self):
try:
self._wait()
except Exception:
LOG.exception('done with wait')
raise
def _wait(self):
LOG.debug('calling RpcWorker wait()')
for server in self._servers:
if isinstance(server, rpc_server.MessageHandlingServer):
LOG.debug('calling wait on %s', server)
server.wait()
else:
LOG.debug('NOT calling wait on %s', server)
LOG.debug('returning from RpcWorker wait()')
def stop(self):
LOG.debug('calling RpcWorker stop()')
for server in self._servers:
if isinstance(server, rpc_server.MessageHandlingServer):
LOG.debug('calling stop on %s', server)
server.stop()
@staticmethod
def reset():
config.reset_service()
class RpcReportsWorker(RpcWorker):
start_listeners_method = 'start_rpc_state_reports_listener'
def _get_worker_count():
# Start with the number of CPUs
num_workers = processutils.get_worker_count()
# Now don't use more than half the system memory, assuming
# a steady-state bloat of around 2GB.
mem = psutil.virtual_memory()
mem_workers = int(mem.total / (2 * 1024 * 1024 * 1024))
if mem_workers < num_workers:
num_workers = mem_workers
# And just in case, always at least one.
if num_workers <= 0:
num_workers = 1
return num_workers
def _get_rpc_workers(plugin=None):
if plugin is None:
plugin = directory.get_plugin()
service_plugins = directory.get_plugins().values()
workers = cfg.CONF.rpc_workers
if workers is None:
# By default, half as many rpc workers as api workers
workers = int(_get_api_workers() / 2)
if workers < 1:
workers = 1
# If workers > 0 then start_rpc_listeners would be called in a
# subprocess and we cannot simply catch the NotImplementedError. It is
# simpler to check this up front by testing whether the plugin supports
# multiple RPC workers.
if not plugin.rpc_workers_supported():
LOG.debug("Active plugin doesn't implement start_rpc_listeners")
if workers > 0:
LOG.error("'rpc_workers = %d' ignored because "
"start_rpc_listeners is not implemented.",
workers)
raise NotImplementedError()
# passing service plugins only, because core plugin is among them
rpc_workers = [RpcWorker(service_plugins,
worker_process_count=workers)]
if (cfg.CONF.rpc_state_report_workers > 0 and
plugin.rpc_state_report_workers_supported()):
rpc_workers.append(
RpcReportsWorker(
[plugin],
worker_process_count=cfg.CONF.rpc_state_report_workers
)
)
return rpc_workers
def _get_plugins_workers():
# NOTE(twilson) get_plugins also returns the core plugin
plugins = directory.get_unique_plugins()
# TODO(twilson) Instead of defaulting here, come up with a good way to
# share a common get_workers default between NeutronPluginBaseV2 and
# ServicePluginBase
return [
plugin_worker
for plugin in plugins if hasattr(plugin, 'get_workers')
for plugin_worker in plugin.get_workers()
]
class AllServicesNeutronWorker(neutron_worker.NeutronBaseWorker):
def __init__(self, services, worker_process_count=1):
super(AllServicesNeutronWorker, self).__init__(worker_process_count)
self._services = services
self._launcher = common_service.Launcher(cfg.CONF,
restart_method='mutate')
def start(self):
for srv in self._services:
self._launcher.launch_service(srv)
super(AllServicesNeutronWorker, self).start(desc="services worker")
def stop(self):
self._launcher.stop()
def wait(self):
self._launcher.wait()
def reset(self):
self._launcher.restart()
def _start_workers(workers, neutron_api=None):
process_workers = [
plugin_worker for plugin_worker in workers
if plugin_worker.worker_process_count > 0
]
try:
if process_workers:
# Get eventual already existing instance from WSGI app
worker_launcher = None
if neutron_api:
worker_launcher = neutron_api.wsgi_app.process_launcher
if worker_launcher is None:
worker_launcher = common_service.ProcessLauncher(
cfg.CONF, wait_interval=1.0, restart_method='mutate'
)
# add extra process worker and spawn there all workers with
# worker_process_count == 0
thread_workers = [
plugin_worker for plugin_worker in workers
if plugin_worker.worker_process_count < 1
]
if thread_workers:
process_workers.append(
AllServicesNeutronWorker(thread_workers)
)
# dispose the whole pool before os.fork, otherwise there will
# be shared DB connections in child processes which may cause
# DB errors.
session.get_context_manager().dispose_pool()
for worker in process_workers:
worker_launcher.launch_service(worker,
worker.worker_process_count)
else:
worker_launcher = common_service.ServiceLauncher(cfg.CONF)
for worker in workers:
worker_launcher.launch_service(worker)
return worker_launcher
except Exception:
with excutils.save_and_reraise_exception():
LOG.exception('Unrecoverable error: please check log for '
'details.')
def start_all_workers(neutron_api=None):
workers = _get_rpc_workers() + _get_plugins_workers()
launcher = _start_workers(workers, neutron_api)
registry.publish(resources.PROCESS, events.AFTER_SPAWN, None)
return launcher
def start_rpc_workers():
rpc_workers = _get_rpc_workers()
LOG.debug('using launcher for rpc, workers=%s', cfg.CONF.rpc_workers)
launcher = _start_workers(rpc_workers)
registry.publish(resources.PROCESS, events.AFTER_SPAWN, None)
return launcher
def start_plugins_workers():
plugins_workers = _get_plugins_workers()
return _start_workers(plugins_workers)
def _get_api_workers():
workers = cfg.CONF.api_workers
if workers is None:
workers = _get_worker_count()
return workers
def _run_wsgi(app_name):
app = config.load_paste_app(app_name)
if not app:
LOG.error('No known API applications configured.')
return
return run_wsgi_app(app)
def run_wsgi_app(app):
server = wsgi.Server("Neutron")
server.start(app, cfg.CONF.bind_port, cfg.CONF.bind_host,
workers=_get_api_workers(), desc="api worker")
LOG.info("Neutron service started, listening on %(host)s:%(port)s",
{'host': cfg.CONF.bind_host, 'port': cfg.CONF.bind_port})
return server
class Service(n_rpc.Service):
"""Service object for binaries running on hosts.
A service takes a manager and enables rpc by listening to queues based
on topic. It also periodically runs tasks on the manager.
"""
def __init__(self, host, binary, topic, manager, report_interval=None,
periodic_interval=None, periodic_fuzzy_delay=None,
*args, **kwargs):
self.binary = binary
self.manager_class_name = manager
manager_class = importutils.import_class(self.manager_class_name)
self.manager = manager_class(host=host, *args, **kwargs)
self.report_interval = report_interval
self.periodic_interval = periodic_interval
self.periodic_fuzzy_delay = periodic_fuzzy_delay
self.saved_args, self.saved_kwargs = args, kwargs
self.timers = []
profiler.setup(binary, host)
super(Service, self).__init__(host, topic, manager=self.manager)
def start(self):
self.manager.init_host()
super(Service, self).start()
if self.report_interval:
pulse = loopingcall.FixedIntervalLoopingCall(self.report_state)
pulse.start(interval=self.report_interval,
initial_delay=self.report_interval)
self.timers.append(pulse)
if self.periodic_interval:
if self.periodic_fuzzy_delay:
initial_delay = random.randint(0, self.periodic_fuzzy_delay)
else:
initial_delay = None
periodic = loopingcall.FixedIntervalLoopingCall(
self.periodic_tasks)
periodic.start(interval=self.periodic_interval,
initial_delay=initial_delay)
self.timers.append(periodic)
self.manager.after_start()
def __getattr__(self, key):
manager = self.__dict__.get('manager', None)
return getattr(manager, key)
@classmethod
def create(cls, host=None, binary=None, topic=None, manager=None,
report_interval=None, periodic_interval=None,
periodic_fuzzy_delay=None):
"""Instantiates class and passes back application object.
:param host: defaults to cfg.CONF.host
:param binary: defaults to basename of executable
:param topic: defaults to bin_name - 'neutron-' part
:param manager: defaults to cfg.CONF.<topic>_manager
:param report_interval: defaults to cfg.CONF.report_interval
:param periodic_interval: defaults to cfg.CONF.periodic_interval
:param periodic_fuzzy_delay: defaults to cfg.CONF.periodic_fuzzy_delay
"""
if not host:
host = cfg.CONF.host
if not binary:
binary = os.path.basename(inspect.stack()[-1][1])
if not topic:
topic = binary.rpartition('neutron-')[2]
topic = topic.replace("-", "_")
if not manager:
manager = cfg.CONF.get('%s_manager' % topic, None)
if report_interval is None:
report_interval = cfg.CONF.report_interval
if periodic_interval is None:
periodic_interval = cfg.CONF.periodic_interval
if periodic_fuzzy_delay is None:
periodic_fuzzy_delay = cfg.CONF.periodic_fuzzy_delay
service_obj = cls(host, binary, topic, manager,
report_interval=report_interval,
periodic_interval=periodic_interval,
periodic_fuzzy_delay=periodic_fuzzy_delay)
return service_obj
def kill(self):
"""Destroy the service object."""
self.stop()
def stop(self):
super(Service, self).stop()
for x in self.timers:
try:
x.stop()
except Exception:
LOG.exception("Exception occurs when timer stops")
self.timers = []
self.manager.stop()
def wait(self):
super(Service, self).wait()
for x in self.timers:
try:
x.wait()
except Exception:
LOG.exception("Exception occurs when waiting for timer")
def reset(self):
config.reset_service()
def periodic_tasks(self, raise_on_error=False):
"""Tasks to be run at a periodic interval."""
ctxt = context.get_admin_context()
self.manager.periodic_tasks(ctxt, raise_on_error=raise_on_error)
def report_state(self):
"""Update the state of this service."""
# Todo(gongysh) report state to neutron server
pass