blueprint <multi-process-api-service>

Add multiprocess support for API serivces (EC2/OSAPI_Compute/OSAPI_Volume/Metadata).

2012-06-1 v7:
    * Add unittest to cover worker recovery, service termination functionality
    in wsgi.py, fix python 2.6 compatibility issue.
    * Modify generate_uid() to introduce per-process seeds in utils.py to avoid
    collisions.
    * Add worker session to nova.conf.sample.
2012-05-21 v6:
    * Fix 'test_wsgi' unittest error.
2012-04-28 v5:
    * Add SIGINT handler and fix child-parent race condition when Ctrl+C is
    pressed.
2012-03-31 v4:
    * Fixed typo, removed debug code.
2012-03-30 v3:
    * Fixed localization/pep8 error in unittest, add metadata test.
    * nova/wsgi.py:Server: use the greenthread pool created for each process.
    * nova/service.py: remove debug code
2012-03-27 v2:
    * Fixed unittest error.
    * nova/wsgi.py:Server: Use self._logger to do logging in multiprocess mode.
    * nova/wsgi.py:Server: Move self._pool creation into proper place.
    * code style fix.
2012-03-25 v1:
    * Modification to nova/service.py and nova/wsgi.py in order to support
    multiprocess (a.k.a. workers) for various API services.  If multiprocess
    mode is enabled, (i.e. flags 'APINAME_workers' set to positive numbers),
    corresponding API service will run in target number of process(es). There
    is also a master_worker process spawned for managing all workers (handling
    signal/termination).
    * Add unittest for multiprocess API service, also alter testing/runner.py
    to adopt new unittest.

Change-Id: Ia045e595543ddfd192894b2a05801cc4b7ca90cb
This commit is contained in:
Zhiteng Huang
2012-03-25 02:06:01 +08:00
parent 846896e950
commit 2b39c106f2
10 changed files with 185 additions and 28 deletions

View File

@@ -213,6 +213,7 @@ Yun Mao <yunmao@gmail.com>
Yun Shen <Yun.Shen@hp.com> Yun Shen <Yun.Shen@hp.com>
Yuriy Taraday <yorik.sar@gmail.com> Yuriy Taraday <yorik.sar@gmail.com>
Zed Shaw <zedshaw@zedshaw.com> Zed Shaw <zedshaw@zedshaw.com>
Zhiteng Huang <zhiteng.huang@intel.com>
Zhixue Wu <Zhixue.Wu@citrix.com> Zhixue Wu <Zhixue.Wu@citrix.com>
Zhongyue Luo <lzyeval@gmail.com> Zhongyue Luo <lzyeval@gmail.com>
Ziad Sawalha <github@highbridgellc.com> Ziad Sawalha <github@highbridgellc.com>

View File

@@ -24,7 +24,7 @@ Starts both the EC2 and OpenStack APIs in separate greenthreads.
""" """
import eventlet import eventlet
eventlet.monkey_patch() eventlet.monkey_patch(os=False)
import os import os
import sys import sys

View File

@@ -20,7 +20,7 @@
"""Starter script for Nova EC2 API.""" """Starter script for Nova EC2 API."""
import eventlet import eventlet
eventlet.monkey_patch() eventlet.monkey_patch(os=False)
import os import os
import sys import sys

View File

@@ -20,7 +20,7 @@
"""Starter script for Nova Metadata API.""" """Starter script for Nova Metadata API."""
import eventlet import eventlet
eventlet.monkey_patch() eventlet.monkey_patch(os=False)
import os import os
import sys import sys

View File

@@ -20,7 +20,7 @@
"""Starter script for Nova OS API.""" """Starter script for Nova OS API."""
import eventlet import eventlet
eventlet.monkey_patch() eventlet.monkey_patch(os=False)
import os import os
import sys import sys

View File

@@ -20,7 +20,7 @@
"""Starter script for Nova OS API.""" """Starter script for Nova OS API."""
import eventlet import eventlet
eventlet.monkey_patch() eventlet.monkey_patch(os=False)
import os import os
import sys import sys

View File

@@ -61,12 +61,18 @@ service_opts = [
cfg.IntOpt('ec2_listen_port', cfg.IntOpt('ec2_listen_port',
default=8773, default=8773,
help='port for ec2 api to listen'), help='port for ec2 api to listen'),
cfg.IntOpt('ec2_workers',
default=0,
help='Number of workers for EC2 API service'),
cfg.StrOpt('osapi_compute_listen', cfg.StrOpt('osapi_compute_listen',
default="0.0.0.0", default="0.0.0.0",
help='IP address for OpenStack API to listen'), help='IP address for OpenStack API to listen'),
cfg.IntOpt('osapi_compute_listen_port', cfg.IntOpt('osapi_compute_listen_port',
default=8774, default=8774,
help='list port for osapi compute'), help='list port for osapi compute'),
cfg.IntOpt('osapi_compute_workers',
default=0,
help='Number of workers for OpenStack API service'),
cfg.StrOpt('metadata_manager', cfg.StrOpt('metadata_manager',
default='nova.api.manager.MetadataManager', default='nova.api.manager.MetadataManager',
help='OpenStack metadata service manager'), help='OpenStack metadata service manager'),
@@ -76,12 +82,18 @@ service_opts = [
cfg.IntOpt('metadata_listen_port', cfg.IntOpt('metadata_listen_port',
default=8775, default=8775,
help='port for metadata api to listen'), help='port for metadata api to listen'),
cfg.IntOpt('metadata_workers',
default=0,
help='Number of workers for metadata service'),
cfg.StrOpt('osapi_volume_listen', cfg.StrOpt('osapi_volume_listen',
default="0.0.0.0", default="0.0.0.0",
help='IP address for OpenStack Volume API to listen'), help='IP address for OpenStack Volume API to listen'),
cfg.IntOpt('osapi_volume_listen_port', cfg.IntOpt('osapi_volume_listen_port',
default=8776, default=8776,
help='port for os volume api to listen') help='port for os volume api to listen'),
cfg.IntOpt('osapi_volume_workers',
default=0,
help='Number of workers for OpenStack Volume API service')
] ]
FLAGS = flags.FLAGS FLAGS = flags.FLAGS
@@ -135,14 +147,6 @@ class Launcher(object):
:returns: None :returns: None
""" """
def sigterm(sig, frame):
LOG.audit(_("SIGTERM received"))
# NOTE(jk0): Raise a ^C which is caught by the caller and cleanly
# shuts down the service. This does not yet handle eventlet
# threads.
raise KeyboardInterrupt
signal.signal(signal.SIGTERM, sigterm)
for service in self._services: for service in self._services:
try: try:
@@ -362,10 +366,12 @@ class WSGIService(object):
self.app = self.loader.load_app(name) self.app = self.loader.load_app(name)
self.host = getattr(FLAGS, '%s_listen' % name, "0.0.0.0") self.host = getattr(FLAGS, '%s_listen' % name, "0.0.0.0")
self.port = getattr(FLAGS, '%s_listen_port' % name, 0) self.port = getattr(FLAGS, '%s_listen_port' % name, 0)
self.workers = getattr(FLAGS, '%s_workers' % name, 0)
self.server = wsgi.Server(name, self.server = wsgi.Server(name,
self.app, self.app,
host=self.host, host=self.host,
port=self.port) port=self.port,
workers=self.workers)
def _get_manager(self): def _get_manager(self):
"""Initialize a Manager object appropriate for this service. """Initialize a Manager object appropriate for this service.

View File

@@ -366,5 +366,5 @@ def run():
if __name__ == '__main__': if __name__ == '__main__':
eventlet.monkey_patch() eventlet.monkey_patch(os=False)
run() run()

View File

@@ -63,6 +63,7 @@ LOG = logging.getLogger(__name__)
ISO_TIME_FORMAT = "%Y-%m-%dT%H:%M:%S" ISO_TIME_FORMAT = "%Y-%m-%dT%H:%M:%S"
PERFECT_TIME_FORMAT = "%Y-%m-%dT%H:%M:%S.%f" PERFECT_TIME_FORMAT = "%Y-%m-%dT%H:%M:%S.%f"
FLAGS = flags.FLAGS FLAGS = flags.FLAGS
RESEED = True
FLAGS.register_opt( FLAGS.register_opt(
cfg.BoolOpt('disable_process_locking', default=False, cfg.BoolOpt('disable_process_locking', default=False,
@@ -290,6 +291,13 @@ def debug(arg):
def generate_uid(topic, size=8): def generate_uid(topic, size=8):
global RESEED
if RESEED:
random.seed("%d%s%s" % (os.getpid(),
socket.gethostname(),
time.time()))
RESEED = False
characters = '01234567890abcdefghijklmnopqrstuvwxyz' characters = '01234567890abcdefghijklmnopqrstuvwxyz'
choices = [random.choice(characters) for _x in xrange(size)] choices = [random.choice(characters) for _x in xrange(size)]
return '%s-%s' % (topic, ''.join(choices)) return '%s-%s' % (topic, ''.join(choices))

View File

@@ -19,12 +19,16 @@
"""Utility methods for working with WSGI servers.""" """Utility methods for working with WSGI servers."""
import os.path import errno
import os
import signal
import sys import sys
import eventlet import eventlet
from eventlet.green import socket
import eventlet.wsgi import eventlet.wsgi
import greenlet import greenlet
import multiprocessing
from paste import deploy from paste import deploy
import routes.middleware import routes.middleware
import webob.dec import webob.dec
@@ -45,14 +49,15 @@ class Server(object):
default_pool_size = 1000 default_pool_size = 1000
def __init__(self, name, app, host=None, port=None, pool_size=None, def __init__(self, name, app, host=None, port=None, workers=None,
protocol=eventlet.wsgi.HttpProtocol): pool_size=None, protocol=eventlet.wsgi.HttpProtocol):
"""Initialize, but do not start, a WSGI server. """Initialize, but do not start, a WSGI server.
:param name: Pretty name for logging. :param name: Pretty name for logging.
:param app: The WSGI application to serve. :param app: The WSGI application to serve.
:param host: IP address to serve the application. :param host: IP address to serve the application.
:param port: Port number to server the application. :param port: Port number to server the application.
:param workers: Number of process to spawn concurrently
:param pool_size: Maximum number of eventlets to spawn concurrently. :param pool_size: Maximum number of eventlets to spawn concurrently.
:returns: None :returns: None
@@ -61,12 +66,17 @@ class Server(object):
self.app = app self.app = app
self.host = host or "0.0.0.0" self.host = host or "0.0.0.0"
self.port = port or 0 self.port = port or 0
self.workers = workers or 0
self._server = None self._server = None
self._socket = None self._socket = None
self._protocol = protocol self._protocol = protocol
self._pool = eventlet.GreenPool(pool_size or self.default_pool_size) self._pool_size = pool_size or self.default_pool_size
self._logger = logging.getLogger("eventlet.wsgi.server") self._pool = None
self._logger = logging.getLogger("nova.%s.wsgi.server" % self.name)
self._wsgi_logger = logging.WritableLogger(self._logger) self._wsgi_logger = logging.WritableLogger(self._logger)
self.master_worker = None
self.children = []
self.running = True
def _start(self): def _start(self):
"""Run the blocking eventlet WSGI server. """Run the blocking eventlet WSGI server.
@@ -90,11 +100,124 @@ class Server(object):
""" """
if backlog < 1: if backlog < 1:
raise exception.InvalidInput( raise exception.InvalidInput(
reason='The backlog must be more than 1') reason='The backlog must be more than 1')
self._socket = eventlet.listen((self.host, self.port), backlog=backlog)
self._server = eventlet.spawn(self._start) try:
(self.host, self.port) = self._socket.getsockname() self._socket = eventlet.listen((self.host, self.port),
LOG.info(_("Started %(name)s on %(host)s:%(port)s") % self.__dict__) backlog=backlog)
(self.host, self.port) = self._socket.getsockname()
except socket.error, err:
if err[0] != errno.EINVAL:
raise
if self.workers == 0:
# single process mode, useful for profiling, test, debug etc.
self._pool = eventlet.GreenPool(self._pool_size)
self._server = self._pool.spawn(self._start)
LOG.info(_("Started %(name)s on %(host)s:%(port)s") %
self.__dict__)
return None
# master_worker doesn't actually do work (i.e. handle API request)
# but it's a managing process to handle signal/termination for
# this type of API service, only needed if workers > 1
self.master_worker = multiprocessing.Process(target=self.run_workers,
args=())
self.master_worker.start()
self._logger.info(_("Started %(name)s on %(host)s:%(port)s in process")
% self.__dict__)
return None
def run_server_in_process(self):
"""Run a WSGI server."""
eventlet.wsgi.HttpProtocol.default_request_version = "HTTP/1.0"
eventlet.hubs.use_hub('poll')
eventlet.patcher.monkey_patch(all=False, socket=True)
self._pool = eventlet.GreenPool(size=self._pool_size)
try:
self._pool.spawn_n(self._start)
except socket.error, err:
if err[0] != errno.EINVAL:
raise
self._pool.waitall()
def run_workers(self):
"""Start workers and wait for them to join"""
def kill_children(*args):
"""Kills the entire process group."""
#TODO(zhiteng) Gracefully kill all eventlet greenthread
self._logger.error(_('SIGTERM or SIGINT received'))
signal.signal(signal.SIGTERM, signal.SIG_IGN)
signal.signal(signal.SIGINT, signal.SIG_IGN)
self.running = False
for pid in self.children:
os.kill(pid, signal.SIGTERM)
def hup(*args):
"""
Shuts down the server, but allows running requests to complete
"""
self._logger.error(_('SIGHUP received'))
signal.signal(signal.SIGHUP, signal.SIG_IGN)
self.running = False
signal.signal(signal.SIGTERM, kill_children)
signal.signal(signal.SIGINT, kill_children)
signal.signal(signal.SIGHUP, hup)
while len(self.children) < self.workers:
self.run_child()
self._logger.info(_("Started %(children_count)d worker for %(name)s")
% {'children_count': len(self.children),
'name': self.name})
self.wait_on_children()
def run_child(self):
try:
pid = os.fork()
except KeyboardInterrupt:
pass
if pid == 0:
signal.signal(signal.SIGHUP, signal.SIG_DFL)
signal.signal(signal.SIGTERM, signal.SIG_DFL)
# to avoid race condition that child receive signal before
# parent and is respawned
signal.signal(signal.SIGINT, signal.SIG_IGN)
try:
self.run_server_in_process()
except KeyboardInterrupt:
pass
self._logger.info(_('Child %d exiting normally') % os.getpid())
return None
else:
self._logger.info(_('[%(name)s] Started worker (pid: %(pid)s)') %
{'name': self.name,
'pid': pid})
self.children.append(pid)
def wait_on_children(self):
while self.running:
try:
pid, status = os.wait()
if os.WIFEXITED(status) or os.WIFSIGNALED(status):
self._logger.error(_('Dead worker %(pid)s') % locals())
if pid in self.children:
self.children.remove(pid)
self.run_child()
except OSError, err:
if err.errno not in (errno.EINTR, errno.ECHILD):
raise
except KeyboardInterrupt:
self._logger.info(_('Caught keyboard interrupt. Exiting.'))
self.running = False
break
eventlet.greenio.shutdown_safe(self._socket)
self._socket.close()
self._logger.debug(_('Exited'))
def stop(self): def stop(self):
"""Stop this server. """Stop this server.
@@ -106,7 +229,19 @@ class Server(object):
""" """
LOG.info(_("Stopping WSGI server.")) LOG.info(_("Stopping WSGI server."))
self._server.kill() if self.workers > 0:
# set running state to false and kill all workers
self.running = False
for pid in self.children:
os.kill(pid, signal.SIGTERM)
self.children.remove(pid)
# now terminate master_worker
if self.master_worker.is_alive():
self.master_worker.terminate()
else:
# Resize Pool to stop accepting new connection
self._pool.resize(0)
self._server.kill()
def wait(self): def wait(self):
"""Block, until the server has stopped. """Block, until the server has stopped.
@@ -117,9 +252,16 @@ class Server(object):
""" """
try: try:
self._server.wait() if self.workers and self.master_worker:
# for services enabled multi-process,a separate master_worker
# is already waiting
pass
else:
self._pool.waitall()
except greenlet.GreenletExit: except greenlet.GreenletExit:
LOG.info(_("WSGI server has stopped.")) LOG.info(_("WSGI server has stopped."))
except KeyboardInterrupt:
pass
class Request(webob.Request): class Request(webob.Request):