swift/swift/common/wsgi.py

514 lines
19 KiB
Python

# Copyright (c) 2010-2012 OpenStack Foundation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""WSGI tools for use with swift."""
import errno
import os
import signal
import time
import mimetools
from swift import gettext_ as _
from itertools import chain
from StringIO import StringIO
import eventlet
import eventlet.debug
from eventlet import greenio, GreenPool, sleep, wsgi, listen
from paste.deploy import loadwsgi
from eventlet.green import socket, ssl
from urllib import unquote
from swift.common import utils
from swift.common.swob import Request
from swift.common.utils import capture_stdio, disable_fallocate, \
drop_privileges, get_logger, NullLogger, config_true_value, \
validate_configuration, get_hub, config_auto_int_value
try:
import multiprocessing
CPU_COUNT = multiprocessing.cpu_count() or 1
except (ImportError, NotImplementedError):
CPU_COUNT = 1
class NamedConfigLoader(loadwsgi.ConfigLoader):
"""
Patch paste.deploy's ConfigLoader so each context object will know what
config section it came from.
"""
def get_context(self, object_type, name=None, global_conf=None):
context = super(NamedConfigLoader, self).get_context(
object_type, name=name, global_conf=global_conf)
context.name = name
return context
loadwsgi.ConfigLoader = NamedConfigLoader
class ConfigDirLoader(NamedConfigLoader):
"""
Read configuration from multiple files under the given path.
"""
def __init__(self, conf_dir):
# parent class uses filename attribute when building error messages
self.filename = conf_dir = conf_dir.strip()
defaults = {
'here': os.path.normpath(os.path.abspath(conf_dir)),
'__file__': os.path.abspath(conf_dir)
}
self.parser = loadwsgi.NicerConfigParser(conf_dir, defaults=defaults)
self.parser.optionxform = str # Don't lower-case keys
utils.read_conf_dir(self.parser, conf_dir)
def _loadconfigdir(object_type, uri, path, name, relative_to, global_conf):
if relative_to:
path = os.path.normpath(os.path.join(relative_to, path))
loader = ConfigDirLoader(path)
if global_conf:
loader.update_defaults(global_conf, overwrite=False)
return loader.get_context(object_type, name, global_conf)
# add config_dir parsing to paste.deploy
loadwsgi._loaders['config_dir'] = _loadconfigdir
def wrap_conf_type(f):
"""
Wrap a function whos first argument is a paste.deploy style config uri,
such that you can pass it an un-adorned raw filesystem path and the config
directive (either config: or config_dir:) will be added automatically
based on the type of filesystem entity at the given path (either a file or
directory) before passing it through to the paste.deploy function.
"""
def wrapper(conf_path, *args, **kwargs):
if os.path.isdir(conf_path):
conf_type = 'config_dir'
else:
conf_type = 'config'
conf_uri = '%s:%s' % (conf_type, conf_path)
return f(conf_uri, *args, **kwargs)
return wrapper
appconfig = wrap_conf_type(loadwsgi.appconfig)
loadapp = wrap_conf_type(loadwsgi.loadapp)
def monkey_patch_mimetools():
"""
mimetools.Message defaults content-type to "text/plain"
This changes it to default to None, so we can detect missing headers.
"""
orig_parsetype = mimetools.Message.parsetype
def parsetype(self):
if not self.typeheader:
self.type = None
self.maintype = None
self.subtype = None
self.plisttext = ''
else:
orig_parsetype(self)
mimetools.Message.parsetype = parsetype
def get_socket(conf, default_port=8080):
"""Bind socket to bind ip:port in conf
:param conf: Configuration dict to read settings from
:param default_port: port to use if not specified in conf
:returns : a socket object as returned from socket.listen or
ssl.wrap_socket if conf specifies cert_file
"""
bind_addr = (conf.get('bind_ip', '0.0.0.0'),
int(conf.get('bind_port', default_port)))
address_family = [addr[0] for addr in socket.getaddrinfo(
bind_addr[0], bind_addr[1], socket.AF_UNSPEC, socket.SOCK_STREAM)
if addr[0] in (socket.AF_INET, socket.AF_INET6)][0]
sock = None
bind_timeout = int(conf.get('bind_timeout', 30))
retry_until = time.time() + bind_timeout
warn_ssl = False
while not sock and time.time() < retry_until:
try:
sock = listen(bind_addr, backlog=int(conf.get('backlog', 4096)),
family=address_family)
if 'cert_file' in conf:
warn_ssl = True
sock = ssl.wrap_socket(sock, certfile=conf['cert_file'],
keyfile=conf['key_file'])
except socket.error as err:
if err.args[0] != errno.EADDRINUSE:
raise
sleep(0.1)
if not sock:
raise Exception(_('Could not bind to %s:%s '
'after trying for %s seconds') % (
bind_addr[0], bind_addr[1], bind_timeout))
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
# in my experience, sockets can hang around forever without keepalive
sock.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1)
if hasattr(socket, 'TCP_KEEPIDLE'):
sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPIDLE, 600)
if warn_ssl:
ssl_warning_message = _('WARNING: SSL should only be enabled for '
'testing purposes. Use external SSL '
'termination for a production deployment.')
get_logger(conf).warning(ssl_warning_message)
print(ssl_warning_message)
return sock
class RestrictedGreenPool(GreenPool):
"""
Works the same as GreenPool, but if the size is specified as one, then the
spawn_n() method will invoke waitall() before returning to prevent the
caller from doing any other work (like calling accept()).
"""
def __init__(self, size=1024):
super(RestrictedGreenPool, self).__init__(size=size)
self._rgp_do_wait = (size == 1)
def spawn_n(self, *args, **kwargs):
super(RestrictedGreenPool, self).spawn_n(*args, **kwargs)
if self._rgp_do_wait:
self.waitall()
def run_server(conf, logger, sock):
# Ensure TZ environment variable exists to avoid stat('/etc/localtime') on
# some platforms. This locks in reported times to the timezone in which
# the server first starts running in locations that periodically change
# timezones.
os.environ['TZ'] = time.strftime("%z", time.gmtime())
wsgi.HttpProtocol.default_request_version = "HTTP/1.0"
# Turn off logging requests by the underlying WSGI software.
wsgi.HttpProtocol.log_request = lambda *a: None
# Redirect logging other messages by the underlying WSGI software.
wsgi.HttpProtocol.log_message = \
lambda s, f, *a: logger.error('ERROR WSGI: ' + f % a)
wsgi.WRITE_TIMEOUT = int(conf.get('client_timeout') or 60)
eventlet.hubs.use_hub(get_hub())
eventlet.patcher.monkey_patch(all=False, socket=True)
eventlet_debug = config_true_value(conf.get('eventlet_debug', 'no'))
eventlet.debug.hub_exceptions(eventlet_debug)
# utils.LogAdapter stashes name in server; fallback on unadapted loggers
if hasattr(logger, 'server'):
log_name = logger.server
else:
log_name = logger.name
app = loadapp(conf['__file__'], global_conf={'log_name': log_name})
max_clients = int(conf.get('max_clients', '1024'))
pool = RestrictedGreenPool(size=max_clients)
try:
wsgi.server(sock, app, NullLogger(), custom_pool=pool)
except socket.error as err:
if err[0] != errno.EINVAL:
raise
pool.waitall()
#TODO(clayg): pull more pieces of this to test more
def run_wsgi(conf_path, app_section, *args, **kwargs):
"""
Runs the server using the specified number of workers.
:param conf_path: Path to paste.deploy style configuration file/directory
:param app_section: App name from conf file to load config from
"""
# Load configuration, Set logger and Load request processor
try:
(conf, logger, log_name) = \
_initrp(conf_path, app_section, *args, **kwargs)
except ConfigFileError as e:
print e
return
# bind to address and port
sock = get_socket(conf, default_port=kwargs.get('default_port', 8080))
# remaining tasks should not require elevated privileges
drop_privileges(conf.get('user', 'swift'))
# Ensure the application can be loaded before proceeding.
loadapp(conf_path, global_conf={'log_name': log_name})
# set utils.FALLOCATE_RESERVE if desired
reserve = int(conf.get('fallocate_reserve', 0))
if reserve > 0:
utils.FALLOCATE_RESERVE = reserve
# redirect errors to logger and close stdio
capture_stdio(logger)
worker_count = config_auto_int_value(conf.get('workers'), CPU_COUNT)
# Useful for profiling [no forks].
if worker_count == 0:
run_server(conf, logger, sock)
return
def kill_children(*args):
"""Kills the entire process group."""
logger.error('SIGTERM received')
signal.signal(signal.SIGTERM, signal.SIG_IGN)
running[0] = False
os.killpg(0, signal.SIGTERM)
def hup(*args):
"""Shuts down the server, but allows running requests to complete"""
logger.error('SIGHUP received')
signal.signal(signal.SIGHUP, signal.SIG_IGN)
running[0] = False
running = [True]
signal.signal(signal.SIGTERM, kill_children)
signal.signal(signal.SIGHUP, hup)
children = []
while running[0]:
while len(children) < worker_count:
pid = os.fork()
if pid == 0:
signal.signal(signal.SIGHUP, signal.SIG_DFL)
signal.signal(signal.SIGTERM, signal.SIG_DFL)
run_server(conf, logger, sock)
logger.notice('Child %d exiting normally' % os.getpid())
return
else:
logger.notice('Started child %s' % pid)
children.append(pid)
try:
pid, status = os.wait()
if os.WIFEXITED(status) or os.WIFSIGNALED(status):
logger.error('Removing dead child %s' % pid)
children.remove(pid)
except OSError as err:
if err.errno not in (errno.EINTR, errno.ECHILD):
raise
except KeyboardInterrupt:
logger.notice('User quit')
break
greenio.shutdown_safe(sock)
sock.close()
logger.notice('Exited')
class ConfigFileError(Exception):
pass
def _initrp(conf_path, app_section, *args, **kwargs):
try:
conf = appconfig(conf_path, name=app_section)
except Exception as e:
raise ConfigFileError("Error trying to load config from %s: %s" %
(conf_path, e))
validate_configuration()
# pre-configure logger
log_name = conf.get('log_name', app_section)
if 'logger' in kwargs:
logger = kwargs.pop('logger')
else:
logger = get_logger(conf, log_name,
log_to_console=kwargs.pop('verbose', False),
log_route='wsgi')
# disable fallocate if desired
if config_true_value(conf.get('disable_fallocate', 'no')):
disable_fallocate()
monkey_patch_mimetools()
return (conf, logger, log_name)
def init_request_processor(conf_path, app_section, *args, **kwargs):
"""
Loads common settings from conf
Sets the logger
Loads the request processor
:param conf_path: Path to paste.deploy style configuration file/directory
:param app_section: App name from conf file to load config from
:returns: the loaded application entry point
:raises ConfigFileError: Exception is raised for config file error
"""
(conf, logger, log_name) = _initrp(conf_path, app_section, *args, **kwargs)
app = loadapp(conf_path, global_conf={'log_name': log_name})
return (app, conf, logger, log_name)
class WSGIContext(object):
"""
This class provides a means to provide context (scope) for a middleware
filter to have access to the wsgi start_response results like the request
status and headers.
"""
def __init__(self, wsgi_app):
self.app = wsgi_app
def _start_response(self, status, headers, exc_info=None):
"""
Saves response info without sending it to the remote client.
Uses the same semantics as the usual WSGI start_response.
"""
self._response_status = status
self._response_headers = headers
self._response_exc_info = exc_info
def _app_call(self, env):
"""
Ensures start_response has been called before returning.
"""
self._response_status = None
self._response_headers = None
self._response_exc_info = None
resp = self.app(env, self._start_response)
# if start_response has been called, just return the iter
if self._response_status is not None:
return resp
resp = iter(resp)
try:
first_chunk = resp.next()
except StopIteration:
return iter([])
else: # We got a first_chunk
return chain([first_chunk], resp)
def _get_status_int(self):
"""
Returns the HTTP status int from the last called self._start_response
result.
"""
return int(self._response_status.split(' ', 1)[0])
def _response_header_value(self, key):
"Returns str of value for given header key or None"
for h_key, val in self._response_headers:
if h_key.lower() == key.lower():
return val
return None
def make_pre_authed_request(env, method=None, path=None, body=None,
headers=None, agent='Swift', swift_source=None):
"""
Makes a new swob.Request based on the current env but with the
parameters specified. Note that this request will be preauthorized.
:param env: The WSGI environment to base the new request on.
:param method: HTTP method of new request; default is from
the original env.
:param path: HTTP path of new request; default is from the
original env. path should be compatible with what you
would send to Request.blank. path should be quoted and it
can include a query string. for example:
'/a%20space?unicode_str%E8%AA%9E=y%20es'
:param body: HTTP body of new request; empty by default.
:param headers: Extra HTTP headers of new request; None by
default.
:param agent: The HTTP user agent to use; default 'Swift'. You
can put %(orig)s in the agent to have it replaced
with the original env's HTTP_USER_AGENT, such as
'%(orig)s StaticWeb'. You also set agent to None to
use the original env's HTTP_USER_AGENT or '' to
have no HTTP_USER_AGENT.
:param swift_source: Used to mark the request as originating out of
middleware. Will be logged in proxy logs.
:returns: Fresh swob.Request object.
"""
query_string = None
path = path or ''
if path and '?' in path:
path, query_string = path.split('?', 1)
newenv = make_pre_authed_env(env, method, path=unquote(path), agent=agent,
query_string=query_string,
swift_source=swift_source)
if not headers:
headers = {}
if body:
return Request.blank(path, environ=newenv, body=body, headers=headers)
else:
return Request.blank(path, environ=newenv, headers=headers)
def make_pre_authed_env(env, method=None, path=None, agent='Swift',
query_string=None, swift_source=None):
"""
Returns a new fresh WSGI environment with escalated privileges to
do backend checks, listings, etc. that the remote user wouldn't
be able to accomplish directly.
:param env: The WSGI environment to base the new environment on.
:param method: The new REQUEST_METHOD or None to use the
original.
:param path: The new path_info or none to use the original. path
should NOT be quoted. When building a url, a Webob
Request (in accordance with wsgi spec) will quote
env['PATH_INFO']. url += quote(environ['PATH_INFO'])
:param query_string: The new query_string or none to use the original.
When building a url, a Webob Request will append
the query string directly to the url.
url += '?' + env['QUERY_STRING']
:param agent: The HTTP user agent to use; default 'Swift'. You
can put %(orig)s in the agent to have it replaced
with the original env's HTTP_USER_AGENT, such as
'%(orig)s StaticWeb'. You also set agent to None to
use the original env's HTTP_USER_AGENT or '' to
have no HTTP_USER_AGENT.
:param swift_source: Used to mark the request as originating out of
middleware. Will be logged in proxy logs.
:returns: Fresh WSGI environment.
"""
newenv = {}
for name in ('eventlet.posthooks', 'HTTP_USER_AGENT', 'HTTP_HOST',
'PATH_INFO', 'QUERY_STRING', 'REMOTE_USER', 'REQUEST_METHOD',
'SCRIPT_NAME', 'SERVER_NAME', 'SERVER_PORT',
'SERVER_PROTOCOL', 'swift.cache', 'swift.source',
'swift.trans_id'):
if name in env:
newenv[name] = env[name]
if method:
newenv['REQUEST_METHOD'] = method
if path:
newenv['PATH_INFO'] = path
newenv['SCRIPT_NAME'] = ''
if query_string is not None:
newenv['QUERY_STRING'] = query_string
if agent:
newenv['HTTP_USER_AGENT'] = (
agent % {'orig': env.get('HTTP_USER_AGENT', '')}).strip()
elif agent == '' and 'HTTP_USER_AGENT' in newenv:
del newenv['HTTP_USER_AGENT']
if swift_source:
newenv['swift.source'] = swift_source
newenv['swift.authorize'] = lambda req: None
newenv['swift.authorize_override'] = True
newenv['REMOTE_USER'] = '.wsgi.pre_authed'
newenv['wsgi.input'] = StringIO('')
if 'SCRIPT_NAME' not in newenv:
newenv['SCRIPT_NAME'] = ''
return newenv