Seamlessly reload servers with SIGUSR1

Swift servers can now be seamlessly reloaded by sending them a SIGUSR1 (instead of a SIGHUP). The server forks off a synchronized child to wait to close the old listen socket(s) until the new server has started up and bound its listen socket(s). The new server is exec'ed from the old one so its PID doesn't change. This makes Systemd happier, so a ReloadExec= stanza can now be used. The seamless part means that incoming connections will alwyas get accepted either by the old server or the new one. This eliminates client-perceived "downtime" during server reloads, while allowing the server to fully reload, re-reading configuration, becoming a fresh Python interpreter instance, etc. The SO_REUSEPORT socket option has already been getting used, so nothing had to change there. This patch also includes a non-invasive fix for a current eventlet bug; see https://github.com/eventlet/eventlet/pull/590 That bug prevents a SIGHUP "reload" from properly servicing existing requests before old worker processes close sockets and exit. The existing probtests missed this, but the new ones, in this patch, caught it. New probe tests cover both old SIGHUP "reload" behavior as well as the new SIGUSR1 seamless reload behavior. Change-Id: I3e5229d2fb04be67e53533ff65b0870038accbb7
2019-10-25 12:48:36 -07:00
parent 281ffab2a5
commit 1107f24179
10 changed files with 549 additions and 168 deletions
--- a/doc/manpages/swift-init.1
+++ b/doc/manpages/swift-init.1
@@ -87,6 +87,7 @@ allows one to use the keywords such as "all", "main" and "rest" for the <server>
 .IP "\fIno-wait\fR: \t\t\t spawn server and return immediately"
 .IP "\fIonce\fR: \t\t\t start server and run one pass on supporting daemons"
 .IP "\fIreload\fR: \t\t\t graceful shutdown then restart on supporting servers"
+.IP "\fIreload-seamless\fR: \t\t reload supporting servers with no downtime"
 .IP "\fIrestart\fR: \t\t\t stops then restarts server"
 .IP "\fIshutdown\fR: \t\t allow current requests to finish on supporting servers"
 .IP "\fIstart\fR: \t\t\t starts a server"
--- a/doc/source/admin_guide.rst
+++ b/doc/source/admin_guide.rst
@@ -1362,20 +1362,29 @@ Swift services are generally managed with ``swift-init``. the general usage is
 ``swift-init <service> <command>``, where service is the Swift service to
 manage (for example object, container, account, proxy) and command is one of:

-==========  ===============================================
-Command     Description
----------  -----------------------------------------------
-start       Start the service
-stop        Stop the service
-restart     Restart the service
-shutdown    Attempt to gracefully shutdown the service
-reload      Attempt to gracefully restart the service
-==========  ===============================================
+===============  ===============================================
+Command          Description
+---------------  -----------------------------------------------
+start            Start the service
+stop             Stop the service
+restart          Restart the service
+shutdown         Attempt to gracefully shutdown the service
+reload           Attempt to gracefully restart the service
+reload-seamless  Attempt to seamlessly restart the service
+===============  ===============================================

-A graceful shutdown or reload will finish any current requests before
-completely stopping the old service.  There is also a special case of
-``swift-init all <command>``, which will run the command for all swift
-services.
+A graceful shutdown or reload will allow all server workers to finish any
+current requests before exiting.  The parent server process exits immediately.
+
+A seamless reload will make new configuration settings active, with no window
+where client requests fail due to there being no active listen socket.
+The parent server process will re-exec itself, retaining its existing PID.
+After the re-exec'ed parent server process binds its listen sockets, the old
+listen sockets are closed and old server workers finish any current requests
+before exiting.
+
+There is also a special case of ``swift-init all <command>``, which will run
+the command for all swift services.

 In cases where there are multiple configs for a service, a specific config
 can be managed with ``swift-init <service>.<config> <command>``.
--- a/swift/common/daemon.py
+++ b/swift/common/daemon.py
@@ -132,6 +132,7 @@ class DaemonStrategy(object):
    def setup(self, **kwargs):
        utils.validate_configuration()
        utils.drop_privileges(self.daemon.conf.get('user', 'swift'))
+        utils.clean_up_daemon_hygiene()
        utils.capture_stdio(self.logger, **kwargs)

        def kill_children(*args):
--- a/swift/common/manager.py
+++ b/swift/common/manager.py
@@ -46,6 +46,7 @@ REST_SERVERS = [s for s in ALL_SERVERS if s not in MAIN_SERVERS]
 # aliases mapping
 ALIASES = {'all': ALL_SERVERS, 'main': MAIN_SERVERS, 'rest': REST_SERVERS}
 GRACEFUL_SHUTDOWN_SERVERS = MAIN_SERVERS
+SEAMLESS_SHUTDOWN_SERVERS = MAIN_SERVERS
 START_ONCE_SERVERS = REST_SERVERS
 # These are servers that match a type (account-*, container-*, object-*) but
 # don't use that type-server.conf file and instead use their own.
@@ -365,6 +366,21 @@ class Manager(object):
            status += m.start(**kwargs)
        return status

+    @command
+    def reload_seamless(self, **kwargs):
+        """seamlessly re-exec, then shutdown of old listen sockets on
+           supporting servers
+        """
+        kwargs.pop('graceful', None)
+        kwargs['seamless'] = True
+        status = 0
+        for server in self.servers:
+            signaled_pids = server.stop(**kwargs)
+            if not signaled_pids:
+                print(_('No %s running') % server)
+                status += 1
+        return status
+
    @command
    def force_reload(self, **kwargs):
        """alias for reload
@@ -628,13 +644,17 @@ class Server(object):
        """Kill running pids

        :param graceful: if True, attempt SIGHUP on supporting servers
+        :param seamless: if True, attempt SIGUSR1 on supporting servers

        :returns: a dict mapping pids (ints) to pid_files (paths)

        """
        graceful = kwargs.get('graceful')
+        seamless = kwargs.get('seamless')
        if graceful and self.server in GRACEFUL_SHUTDOWN_SERVERS:
            sig = signal.SIGHUP
+        elif seamless and self.server in SEAMLESS_SHUTDOWN_SERVERS:
+            sig = signal.SIGUSR1
        else:
            sig = signal.SIGTERM
        return self.signal_pids(sig, **kwargs)
--- a/swift/common/utils.py
+++ b/swift/common/utils.py
@@ -2453,7 +2453,7 @@ def get_hub():
        return None


-def drop_privileges(user, call_setsid=True):
+def drop_privileges(user):
    """
    Sets the userid/groupid of the current process, get session leader, etc.

@@ -2466,11 +2466,13 @@ def drop_privileges(user, call_setsid=True):
    os.setgid(user[3])
    os.setuid(user[2])
    os.environ['HOME'] = user[5]
-    if call_setsid:
-        try:
-            os.setsid()
-        except OSError:
-            pass
+
+
+def clean_up_daemon_hygiene():
+    try:
+        os.setsid()
+    except OSError:
+        pass
    os.chdir('/')   # in case you need to rmdir on where you started the daemon
    os.umask(0o22)  # ensure files are created with the correct privileges

--- a/swift/common/wsgi.py
+++ b/swift/common/wsgi.py
@@ -18,15 +18,18 @@
 from __future__ import print_function

 import errno
+import fcntl
 import os
 import signal
-import time
 from swift import gettext_ as _
+import sys
 from textwrap import dedent
+import time

 import eventlet
 import eventlet.debug
-from eventlet import greenio, GreenPool, sleep, wsgi, listen, Timeout
+from eventlet import greenio, GreenPool, sleep, wsgi, listen, Timeout, \
+    websocket
 from paste.deploy import loadwsgi
 from eventlet.green import socket, ssl, os as green_os
 from io import BytesIO
@@ -42,10 +45,11 @@ from swift.common.swob import Request, wsgi_quote, wsgi_unquote, \
 from swift.common.utils import capture_stdio, disable_fallocate, \
    drop_privileges, get_logger, NullLogger, config_true_value, \
    validate_configuration, get_hub, config_auto_int_value, \
-    reiterate
+    reiterate, clean_up_daemon_hygiene

 SIGNUM_TO_NAME = {getattr(signal, n): n for n in dir(signal)
                  if n.startswith('SIG') and '_' not in n}
+NOTIFY_FD_ENV_KEY = '__SWIFT_SERVER_NOTIFY_FD'

 # Set maximum line size of message headers to be accepted.
 wsgi.MAX_HEADER_LINE = constraints.MAX_HEADER_SIZE
@@ -422,6 +426,13 @@ def load_app_config(conf_file):
 class SwiftHttpProtocol(wsgi.HttpProtocol):
    default_request_version = "HTTP/1.0"

+    def __init__(self, *args, **kwargs):
+        # See https://github.com/eventlet/eventlet/pull/590
+        self.pre_shutdown_bugfix_eventlet = not getattr(
+            websocket.WebSocketWSGI, '_WSGI_APP_ALWAYS_IDLE', None)
+        # Note this is not a new-style class, so super() won't work
+        wsgi.HttpProtocol.__init__(self, *args, **kwargs)
+
    def log_request(self, *a):
        """
        Turn off logging requests by the underlying WSGI software.
@@ -528,6 +539,23 @@ class SwiftHttpProtocol(wsgi.HttpProtocol):
                        b'HTTP/1.1 100 Continue\r\n'
            return environ

+    def _read_request_line(self):
+        # Note this is not a new-style class, so super() won't work
+        got = wsgi.HttpProtocol._read_request_line(self)
+        # See https://github.com/eventlet/eventlet/pull/590
+        if self.pre_shutdown_bugfix_eventlet:
+            self.conn_state[2] = wsgi.STATE_REQUEST
+        return got
+
+    def handle_one_request(self):
+        # Note this is not a new-style class, so super() won't work
+        got = wsgi.HttpProtocol.handle_one_request(self)
+        # See https://github.com/eventlet/eventlet/pull/590
+        if self.pre_shutdown_bugfix_eventlet:
+            if self.conn_state[2] != wsgi.STATE_CLOSE:
+                self.conn_state[2] = wsgi.STATE_IDLE
+        return got
+

 class SwiftHttpProxiedProtocol(SwiftHttpProtocol):
    """
@@ -662,7 +690,36 @@ def run_server(conf, logger, sock, global_conf=None):
    pool.waitall()


-class WorkersStrategy(object):
+class StrategyBase(object):
+    """
+    Some operations common to all strategy classes.
+    """
+
+    def shutdown_sockets(self):
+        """
+        Shutdown any listen sockets.
+        """
+
+        for sock in self.iter_sockets():
+            greenio.shutdown_safe(sock)
+            sock.close()
+
+    def set_close_on_exec_on_listen_sockets(self):
+        """
+        Set the close-on-exec flag on any listen sockets.
+        """
+
+        for sock in self.iter_sockets():
+            if six.PY2:
+                fcntl.fcntl(sock.fileno(), fcntl.F_SETFD, fcntl.FD_CLOEXEC)
+            else:
+                # Python 3.4 and later default to sockets having close-on-exec
+                # set (what PEP 0446 calls "non-inheritable").  This new method
+                # on socket objects is provided to toggle it.
+                sock.set_inheritable(False)
+
+
+class WorkersStrategy(StrategyBase):
    """
    WSGI server management strategy object for a single bind port and listen
    socket shared by a configured number of forked-off workers.
@@ -695,8 +752,7 @@ class WorkersStrategy(object):

    def do_bind_ports(self):
        """
-        Bind the one listen socket for this strategy and drop privileges
-        (since the parent process will never need to bind again).
+        Bind the one listen socket for this strategy.
        """

        try:
@@ -705,7 +761,6 @@ class WorkersStrategy(object):
            msg = 'bind_port wasn\'t properly set in the config file. ' \
                'It must be explicitly set to a valid port number.'
            return msg
-        drop_privileges(self.conf.get('user', 'swift'))

    def no_fork_sock(self):
        """
@@ -766,20 +821,27 @@ class WorkersStrategy(object):
        """
        Called when a worker has exited.

+        NOTE: a re-exec'ed server can reap the dead worker PIDs from the old
+        server process that is being replaced as part of a service reload
+        (SIGUSR1).  So we need to be robust to getting some unknown PID here.
+
        :param int pid: The PID of the worker that exited.
        """

-        self.logger.error('Removing dead child %s from parent %s',
-                          pid, os.getpid())
-        self.children.remove(pid)
+        if pid in self.children:
+            self.logger.error('Removing dead child %s from parent %s',
+                              pid, os.getpid())
+            self.children.remove(pid)
+        else:
+            self.logger.info('Ignoring wait() result from unknown PID %s', pid)

-    def shutdown_sockets(self):
+    def iter_sockets(self):
        """
-        Shutdown any listen sockets.
+        Yields all known listen sockets.
        """

-        greenio.shutdown_safe(self.sock)
-        self.sock.close()
+        if self.sock:
+            yield self.sock


 class PortPidState(object):
@@ -901,7 +963,7 @@ class PortPidState(object):
        self.sock_data_by_port[dead_port]['pids'][server_idx] = None


-class ServersPerPortStrategy(object):
+class ServersPerPortStrategy(StrategyBase):
    """
    WSGI server management strategy object for an object-server with one listen
    port per unique local port in the storage policy rings.  The
@@ -948,28 +1010,13 @@ class ServersPerPortStrategy(object):

    def do_bind_ports(self):
        """
-        Bind one listen socket per unique local storage policy ring port.  Then
-        do all the work of drop_privileges except the actual dropping of
-        privileges (each forked-off worker will do that post-fork in
-        :py:meth:`post_fork_hook`).
+        Bind one listen socket per unique local storage policy ring port.
        """

        self._reload_bind_ports()
        for port in self.bind_ports:
            self._bind_port(port)

-        # The workers strategy drops privileges here, which we obviously cannot
-        # do if we want to support binding to low ports.  But we do want some
-        # of the actions that drop_privileges did.
-        try:
-            os.setsid()
-        except OSError:
-            pass
-        # In case you need to rmdir where you started the daemon:
-        os.chdir('/')
-        # Ensure files are created with the correct privileges:
-        os.umask(0o22)
-
    def no_fork_sock(self):
        """
        This strategy does not support running in the foreground.
@@ -1030,7 +1077,7 @@ class ServersPerPortStrategy(object):
        to drop privileges.
        """

-        drop_privileges(self.conf.get('user', 'swift'), call_setsid=False)
+        drop_privileges(self.conf.get('user', 'swift'))

    def log_sock_exit(self, sock, server_idx):
        """
@@ -1050,6 +1097,7 @@ class ServersPerPortStrategy(object):
                           :py:meth:`new_worker_socks`.
        :param int pid: The new worker process' PID
        """
+
        port = self.port_pid_state.port_for_sock(sock)
        self.logger.notice('Started child %d (PID %d) for port %d',
                           server_idx, pid, port)
@@ -1064,14 +1112,13 @@ class ServersPerPortStrategy(object):

        self.port_pid_state.forget_pid(pid)

-    def shutdown_sockets(self):
+    def iter_sockets(self):
        """
-        Shutdown any listen sockets.
+        Yields all known listen sockets.
        """

        for sock in self.port_pid_state.all_socks():
-            greenio.shutdown_safe(sock)
-            sock.close()
+            yield sock


 def run_wsgi(conf_path, app_section, *args, **kwargs):
@@ -1127,10 +1174,22 @@ def run_wsgi(conf_path, app_section, *args, **kwargs):
        print(error_msg)
        return 1

+    # Do some daemonization process hygene before we fork any children or run a
+    # server without forking.
+    clean_up_daemon_hygiene()
+
    # Redirect errors to logger and close stdio. Do this *after* binding ports;
    # we use this to signal that the service is ready to accept connections.
    capture_stdio(logger)

+    # If necessary, signal an old copy of us that it's okay to shutdown its
+    # listen sockets now because ours are up and ready to receive connections.
+    reexec_signal_fd = os.getenv(NOTIFY_FD_ENV_KEY)
+    if reexec_signal_fd:
+        reexec_signal_fd = int(reexec_signal_fd)
+        os.write(reexec_signal_fd, str(os.getpid()).encode('utf8'))
+        os.close(reexec_signal_fd)
+
    no_fork_sock = strategy.no_fork_sock()
    if no_fork_sock:
        run_server(conf, logger, no_fork_sock, global_conf=global_conf)
@@ -1145,6 +1204,7 @@ def run_wsgi(conf_path, app_section, *args, **kwargs):
    running_context = [True, None]
    signal.signal(signal.SIGTERM, stop_with_signal)
    signal.signal(signal.SIGHUP, stop_with_signal)
+    signal.signal(signal.SIGUSR1, stop_with_signal)

    while running_context[0]:
        for sock, sock_info in strategy.new_worker_socks():
@@ -1152,6 +1212,7 @@ def run_wsgi(conf_path, app_section, *args, **kwargs):
            if pid == 0:
                signal.signal(signal.SIGHUP, signal.SIG_DFL)
                signal.signal(signal.SIGTERM, signal.SIG_DFL)
+                signal.signal(signal.SIGUSR1, signal.SIG_DFL)
                strategy.post_fork_hook()
                run_server(conf, logger, sock)
                strategy.log_sock_exit(sock, sock_info)
@@ -1196,9 +1257,58 @@ def run_wsgi(conf_path, app_section, *args, **kwargs):
            logger.error('Stopping with unexpected signal %r' %
                         running_context[1])
        else:
-            logger.error('%s received', signame)
+            logger.error('%s received (%s)', signame, os.getpid())
    if running_context[1] == signal.SIGTERM:
        os.killpg(0, signal.SIGTERM)
+    elif running_context[1] == signal.SIGUSR1:
+        # set up a pipe, fork off a child to handle cleanup later,
+        # and rexec ourselves with an environment variable set which will
+        # indicate which fd (one of the pipe ends) to write a byte to
+        # to indicate listen socket setup is complete.  That will signal
+        # the forked-off child to complete its listen socket shutdown.
+        #
+        # NOTE: all strategies will now require the parent process to retain
+        # superuser privileges so that the re'execd process can bind a new
+        # socket to the configured IP & port(s).  We can't just reuse existing
+        # listen sockets because then the bind IP couldn't be changed.
+        #
+        # NOTE: we need to set all our listen sockets close-on-exec so the only
+        # open reference to those file descriptors will be in the forked-off
+        # child here who waits to shutdown the old server's listen sockets.  If
+        # the re-exec'ed server's old listen sockets aren't closed-on-exec,
+        # then the old server can't actually ever exit.
+        strategy.set_close_on_exec_on_listen_sockets()
+        read_fd, write_fd = os.pipe()
+        orig_server_pid = os.getpid()
+        child_pid = os.fork()
+        if child_pid:
+            # parent; set env var for fds and reexec ourselves
+            os.close(read_fd)
+            os.putenv(NOTIFY_FD_ENV_KEY, str(write_fd))
+            myself = os.path.realpath(sys.argv[0])
+            logger.info("Old server PID=%d re'execing as: %r",
+                        orig_server_pid, [myself] + list(sys.argv))
+            os.execv(myself, sys.argv)
+            logger.error('Somehow lived past os.execv()?!')
+            exit('Somehow lived past os.execv()?!')
+        elif child_pid == 0:
+            # child
+            os.close(write_fd)
+            logger.info('Old server temporary child PID=%d waiting for '
+                        "re-exec'ed PID=%d to signal readiness...",
+                        os.getpid(), orig_server_pid)
+            try:
+                got_pid = os.read(read_fd, 30)
+                logger.info('Old server temporary child PID=%d notified '
+                            'to shutdown old listen sockets by PID=%s',
+                            os.getpid(), got_pid)
+            except Exception as e:
+                logger.warning('Unexpected exception while reading from '
+                               'pipe:', exc_info=True)
+            try:
+                os.close(read_fd)
+            except Exception:
+                pass

    strategy.shutdown_sockets()
    signal.signal(signal.SIGTERM, signal.SIG_IGN)
--- a/test/probe/test_signals.py
+++ b/test/probe/test_signals.py
@@ -16,10 +16,14 @@

 import unittest

-import random
 from contextlib import contextmanager
-
 import eventlet
+import json
+import os
+import random
+import shutil
+import time
+from uuid import uuid4

 from six.moves import http_client as httplib

@@ -27,7 +31,7 @@ from swift.common.storage_policy import POLICIES
 from swift.common.ring import Ring
 from swift.common.manager import Manager

-from test.probe.common import resetswift
+from test.probe.common import resetswift, ReplProbeTest, client


 def putrequest(conn, method, path, headers):
@@ -39,77 +43,311 @@ def putrequest(conn, method, path, headers):
    conn.endheaders()


-class TestWSGIServerProcessHandling(unittest.TestCase):
+def get_server_and_worker_pids(manager, old_workers=None):
+    # Gets all the server parent pids, as well as the set of all worker PIDs
+    # (i.e. any PID whose PPID is in the set of parent pids).
+    server_pid_set = {pid for server in manager.servers
+                      for (_, pid) in server.iter_pid_files()}
+    children_pid_set = set()
+    old_worker_pid_set = set(old_workers or [])
+    all_pids = [int(f) for f in os.listdir('/proc') if f.isdigit()]
+    for pid in all_pids:
+        try:
+            with open('/proc/%d/status' % pid, 'r') as fh:
+                for line in fh:
+                    if line.startswith('PPid:\t'):
+                        ppid = int(line[6:])
+                        if ppid in server_pid_set or pid in old_worker_pid_set:
+                            children_pid_set.add(pid)
+                        break
+        except Exception:
+            # No big deal, a process could have exited since we listed /proc,
+            # so we just ignore errors
+            pass
+    return {'server': server_pid_set, 'worker': children_pid_set}
+
+
+def wait_for_pids(manager, callback, timeout=15, old_workers=None):
+    # Waits up to `timeout` seconds for the supplied callback to return True
+    # when passed in the manager's pid set.
+    start_time = time.time()
+
+    pid_sets = get_server_and_worker_pids(manager, old_workers=old_workers)
+    got = callback(pid_sets)
+    while not got and time.time() - start_time < timeout:
+        time.sleep(0.1)
+        pid_sets = get_server_and_worker_pids(manager, old_workers=old_workers)
+        got = callback(pid_sets)
+    if time.time() - start_time >= timeout:
+        raise AssertionError('timed out waiting for PID state; got %r' % (
+            pid_sets))
+    return pid_sets
+
+
+class TestWSGIServerProcessHandling(ReplProbeTest):
+    # Subclasses need to define SERVER_NAME
+    HAS_INFO = False
+    PID_TIMEOUT = 25

    def setUp(self):
-        resetswift()
+        super(TestWSGIServerProcessHandling, self).setUp()
+        self.container = 'container-%s' % uuid4()
+        client.put_container(self.url, self.token, self.container,
+                             headers={'X-Storage-Policy':
+                                      self.policy.name})
+        self.manager = Manager([self.SERVER_NAME])
+        for server in self.manager.servers:
+            self.assertTrue(server.get_running_pids,
+                            'No running PIDs for %s' % server.cmd)
+        self.starting_pids = get_server_and_worker_pids(self.manager)

-    def _check_reload(self, server_name, ip, port):
-        manager = Manager([server_name])
-        manager.start()
+    def assert4xx(self, resp):
+        self.assertEqual(resp.status // 100, 4)
+        got_body = resp.read()
+        try:
+            self.assertIn('resource could not be found', got_body)
+        except AssertionError:
+            self.assertIn('Invalid path: blah', got_body)

-        starting_pids = {pid for server in manager.servers
-                         for (_, pid) in server.iter_pid_files()}
+    def get_conn(self):
+        ip, port = self.get_ip_port()
+        return httplib.HTTPConnection('%s:%s' % (ip, port))

-        body = b'test' * 10
-        conn = httplib.HTTPConnection('%s:%s' % (ip, port))
+    def _check_reload(self):
+        conn = self.get_conn()
+        self.addCleanup(conn.close)

        # sanity request
-        putrequest(conn, 'PUT', 'blah',
-                   headers={'Content-Length': len(body)})
-        conn.send(body)
-        resp = conn.getresponse()
-        self.assertEqual(resp.status // 100, 4)
-        resp.read()
+        self.start_write_req(conn, 'sanity')
+        resp = self.finish_write_req(conn)
+        self.check_write_resp(resp)

-        # Start the request before reloading...
-        putrequest(conn, 'PUT', 'blah',
-                   headers={'Content-Length': len(body)})
+        if self.HAS_INFO:
+            self.check_info_value(8192)

-        manager.reload()
+        # Start another write request before reloading...
+        self.start_write_req(conn, 'across-reload')

-        post_reload_pids = {pid for server in manager.servers
-                            for (_, pid) in server.iter_pid_files()}
+        if self.HAS_INFO:
+            self.swap_configs()  # new server's max_header_size == 8191

-        # none of the pids we started with are being tracked after reload
-        msg = 'expected all pids from %r to have died, but found %r' % (
-            starting_pids, post_reload_pids)
-        self.assertFalse(starting_pids & post_reload_pids, msg)
+        self.do_reload()

-        # ... and make sure we can finish what we were doing, and even
-        # start part of a new request
-        conn.send(body)
-        resp = conn.getresponse()
-        self.assertEqual(resp.status // 100, 4)
-        # We can even read the body
-        self.assertTrue(resp.read())
+        wait_for_pids(self.manager, self.make_post_reload_pid_cb(),
+                      old_workers=self.starting_pids['worker'],
+                      timeout=self.PID_TIMEOUT)
+
+        # ... and make sure we can finish what we were doing
+        resp = self.finish_write_req(conn)
+        self.check_write_resp(resp)

        # After this, we're in a funny spot. With eventlet 0.22.0, the
        # connection's now closed, but with prior versions we could keep
        # going indefinitely. See https://bugs.launchpad.net/swift/+bug/1792615

-        # Close our connection, to make sure old eventlet shuts down
+        # Close our connections, to make sure old eventlet shuts down
        conn.close()

        # sanity
-        post_close_pids = {pid for server in manager.servers
-                           for (_, pid) in server.iter_pid_files()}
-        self.assertEqual(post_reload_pids, post_close_pids)
+        wait_for_pids(self.manager, self.make_post_close_pid_cb(),
+                      old_workers=self.starting_pids['worker'],
+                      timeout=self.PID_TIMEOUT)

-    def test_proxy_reload(self):
-        self._check_reload('proxy-server', 'localhost', 8080)
+        if self.HAS_INFO:
+            self.check_info_value(8191)

-    def test_object_reload(self):
+
+class OldReloadMixin(object):
+    def make_post_reload_pid_cb(self):
+        def _cb(post_reload_pids):
+            # We expect all old server PIDs to be gone, a new server present,
+            # and for there to be exactly 1 old worker PID plus additional new
+            # worker PIDs.
+            old_servers_dead = not (self.starting_pids['server'] &
+                                    post_reload_pids['server'])
+            one_old_worker = 1 == len(self.starting_pids['worker'] &
+                                      post_reload_pids['worker'])
+            new_workers_present = (post_reload_pids['worker'] -
+                                   self.starting_pids['worker'])
+            return (post_reload_pids['server'] and old_servers_dead and
+                    one_old_worker and new_workers_present)
+        return _cb
+
+    def make_post_close_pid_cb(self):
+        def _cb(post_close_pids):
+            # We expect all old server PIDs to be gone, a new server present,
+            # no old worker PIDs, and additional new worker PIDs.
+            old_servers_dead = not (self.starting_pids['server'] &
+                                    post_close_pids['server'])
+            old_workers_dead = not (self.starting_pids['worker'] &
+                                    post_close_pids['worker'])
+            new_workers_present = (post_close_pids['worker'] -
+                                   self.starting_pids['worker'])
+            return (post_close_pids['server'] and old_servers_dead and
+                    old_workers_dead and new_workers_present)
+        return _cb
+
+    def do_reload(self):
+        self.manager.reload()
+
+
+class SeamlessReloadMixin(object):
+    def make_post_reload_pid_cb(self):
+        def _cb(post_reload_pids):
+            # We expect all orig server PIDs to STILL BE PRESENT, no new server
+            # present, and for there to be exactly 1 old worker PID plus
+            # additional new worker PIDs.
+            same_servers = (self.starting_pids['server'] ==
+                            post_reload_pids['server'])
+            one_old_worker = 1 == len(self.starting_pids['worker'] &
+                                      post_reload_pids['worker'])
+            new_workers_present = (post_reload_pids['worker'] -
+                                   self.starting_pids['worker'])
+            return (post_reload_pids['server'] and same_servers and
+                    one_old_worker and new_workers_present)
+        return _cb
+
+    def make_post_close_pid_cb(self):
+        def _cb(post_close_pids):
+            # We expect all orig server PIDs to STILL BE PRESENT, no new server
+            # present, no old worker PIDs, and additional new worker PIDs.
+            same_servers = (self.starting_pids['server'] ==
+                            post_close_pids['server'])
+            old_workers_dead = not (self.starting_pids['worker'] &
+                                    post_close_pids['worker'])
+            new_workers_present = (post_close_pids['worker'] -
+                                   self.starting_pids['worker'])
+            return (post_close_pids['server'] and same_servers and
+                    old_workers_dead and new_workers_present)
+        return _cb
+
+    def do_reload(self):
+        self.manager.reload_seamless()
+
+
+class TestObjectServerReloadBase(TestWSGIServerProcessHandling):
+    SERVER_NAME = 'object'
+    PID_TIMEOUT = 35
+
+    def get_ip_port(self):
        policy = random.choice(list(POLICIES))
        policy.load_ring('/etc/swift')
-        node = random.choice(policy.object_ring.get_part_nodes(1))
-        self._check_reload('object', node['ip'], node['port'])
+        self.ring_node = random.choice(policy.object_ring.get_part_nodes(1))
+        return self.ring_node['ip'], self.ring_node['port']

-    def test_account_container_reload(self):
-        for server in ('account', 'container'):
-            ring = Ring('/etc/swift', ring_name=server)
-            node = random.choice(ring.get_part_nodes(1))
-            self._check_reload(server, node['ip'], node['port'])
+    def start_write_req(self, conn, suffix):
+        putrequest(conn, 'PUT', '/%s/123/%s/%s/blah-%s' % (
+            self.ring_node['device'], self.account, self.container, suffix),
+            headers={'X-Timestamp': str(time.time()),
+                     'Content-Type': 'application/octet-string',
+                     'Content-Length': len(self.BODY)})
+
+    def finish_write_req(self, conn):
+        conn.send(self.BODY)
+        return conn.getresponse()
+
+    def check_write_resp(self, resp):
+        got_body = resp.read()
+        self.assertEqual(resp.status // 100, 2, 'Got status %d; %r' %
+                         (resp.status, got_body))
+        self.assertEqual('', got_body)
+        return resp
+
+
+class TestObjectServerReload(OldReloadMixin, TestObjectServerReloadBase):
+    BODY = 'test-object' * 10
+
+    def test_object_reload(self):
+        self._check_reload()
+
+
+class TestObjectServerReloadSeamless(SeamlessReloadMixin,
+                                     TestObjectServerReloadBase):
+    BODY = 'test-object' * 10
+
+    def test_object_reload_seamless(self):
+        self._check_reload()
+
+
+class TestProxyServerReloadBase(TestWSGIServerProcessHandling):
+    SERVER_NAME = 'proxy-server'
+    HAS_INFO = True
+
+    def setUp(self):
+        super(TestProxyServerReloadBase, self).setUp()
+        self.swift_conf_path = '/etc/swift/swift.conf'
+        self.new_swift_conf_path = self.swift_conf_path + '.new'
+        self.saved_swift_conf_path = self.swift_conf_path + '.orig'
+        shutil.copy(self.swift_conf_path, self.saved_swift_conf_path)
+        shutil.copy(self.swift_conf_path, self.new_swift_conf_path)
+        with open(self.new_swift_conf_path, 'a+') as fh:
+            fh.seek(0, os.SEEK_END)
+            fh.write('\n[swift-constraints]\nmax_header_size = 8191\n')
+            fh.flush()
+
+    def tearDown(self):
+        shutil.move(self.saved_swift_conf_path, self.swift_conf_path)
+        try:
+            os.unlink(self.new_swift_conf_path)
+        except OSError:
+            pass
+        super(TestProxyServerReloadBase, self).tearDown()
+
+    def swap_configs(self):
+        shutil.copy(self.new_swift_conf_path, self.swift_conf_path)
+
+    def get_ip_port(self):
+        return 'localhost', 8080
+
+    def assertMaxHeaderSize(self, resp, exp_max_header_size):
+        self.assertEqual(resp.status // 100, 2)
+        info_dict = json.loads(resp.read())
+        self.assertEqual(exp_max_header_size,
+                         info_dict['swift']['max_header_size'])
+
+    def check_info_value(self, expected_value):
+        # show that we're talking to the original server with the default
+        # max_header_size == 8192
+        conn2 = self.get_conn()
+        putrequest(conn2, 'GET', '/info',
+                   headers={'Content-Length': '0',
+                            'Accept': 'application/json'})
+        conn2.send('')
+        resp = conn2.getresponse()
+        self.assertMaxHeaderSize(resp, expected_value)
+        conn2.close()
+
+    def start_write_req(self, conn, suffix):
+        putrequest(conn, 'PUT', '/v1/%s/%s/blah-%s' % (
+            self.account, self.container, suffix),
+            headers={'X-Auth-Token': self.token,
+                     'Content-Length': len(self.BODY)})
+
+    def finish_write_req(self, conn):
+        conn.send(self.BODY)
+        return conn.getresponse()
+
+    def check_write_resp(self, resp):
+        got_body = resp.read()
+        self.assertEqual(resp.status // 100, 2, 'Got status %d; %r' %
+                         (resp.status, got_body))
+        self.assertEqual('', got_body)
+        return resp
+
+
+class TestProxyServerReload(OldReloadMixin, TestProxyServerReloadBase):
+    BODY = 'proxy' * 10
+
+    def test_proxy_reload(self):
+        self._check_reload()
+
+
+class TestProxyServerReloadSeamless(SeamlessReloadMixin,
+                                    TestProxyServerReloadBase):
+    BODY = 'proxy-seamless' * 10
+
+    def test_proxy_reload_seamless(self):
+        self._check_reload()


@contextmanager
--- a/test/unit/common/test_utils.py
+++ b/test/unit/common/test_utils.py
@@ -2285,15 +2285,16 @@ log_name = %(yarr)s'''
        }
        self.assertEqual(conf, expected)

-    def _check_drop_privileges(self, mock_os, required_func_calls,
-                               call_setsid=True):
+    def test_drop_privileges(self):
+        required_func_calls = ('setgroups', 'setgid', 'setuid')
+        mock_os = MockOs(called_funcs=required_func_calls)
        user = getuser()
        user_data = pwd.getpwnam(user)
        self.assertFalse(mock_os.called_funcs)  # sanity check
        # over-ride os with mock
        with mock.patch('swift.common.utils.os', mock_os):
            # exercise the code
-            utils.drop_privileges(user, call_setsid=call_setsid)
+            utils.drop_privileges(user)

        for func in required_func_calls:
            self.assertIn(func, mock_os.called_funcs)
@@ -2302,34 +2303,41 @@ log_name = %(yarr)s'''
        self.assertEqual(groups, set(mock_os.called_funcs['setgroups'][0]))
        self.assertEqual(user_data[3], mock_os.called_funcs['setgid'][0])
        self.assertEqual(user_data[2], mock_os.called_funcs['setuid'][0])
-        self.assertEqual('/', mock_os.called_funcs['chdir'][0])
-        self.assertEqual(0o22, mock_os.called_funcs['umask'][0])

-    def test_drop_privileges(self):
-        required_func_calls = ('setgroups', 'setgid', 'setuid', 'setsid',
-                               'chdir', 'umask')
+    def test_drop_privileges_no_setgroups(self):
+        required_func_calls = ('geteuid', 'setgid', 'setuid')
        mock_os = MockOs(called_funcs=required_func_calls)
-        self._check_drop_privileges(mock_os, required_func_calls)
+        user = getuser()
+        user_data = pwd.getpwnam(user)
+        self.assertFalse(mock_os.called_funcs)  # sanity check
+        # over-ride os with mock
+        with mock.patch('swift.common.utils.os', mock_os):
+            # exercise the code
+            utils.drop_privileges(user)

-    def test_drop_privileges_setsid_error(self):
-        # OSError trying to get session leader
-        required_func_calls = ('setgroups', 'setgid', 'setuid', 'setsid',
-                               'chdir', 'umask')
-        mock_os = MockOs(called_funcs=required_func_calls,
-                         raise_funcs=('setsid',))
-        self._check_drop_privileges(mock_os, required_func_calls)
+        for func in required_func_calls:
+            self.assertIn(func, mock_os.called_funcs)
+        self.assertNotIn('setgroups', mock_os.called_funcs)
+        self.assertEqual(user_data[5], mock_os.environ['HOME'])
+        self.assertEqual(user_data[3], mock_os.called_funcs['setgid'][0])
+        self.assertEqual(user_data[2], mock_os.called_funcs['setuid'][0])

-    def test_drop_privileges_no_call_setsid(self):
-        required_func_calls = ('setgroups', 'setgid', 'setuid', 'chdir',
-                               'umask')
-        # OSError if trying to get session leader, but it shouldn't be called
+    def test_clean_up_daemon_hygene(self):
+        required_func_calls = ('chdir', 'umask')
+        # OSError if trying to get session leader, but setsid() OSError is
+        # ignored by the code under test.
        bad_func_calls = ('setsid',)
        mock_os = MockOs(called_funcs=required_func_calls,
                         raise_funcs=bad_func_calls)
-        self._check_drop_privileges(mock_os, required_func_calls,
-                                    call_setsid=False)
+        with mock.patch('swift.common.utils.os', mock_os):
+            # exercise the code
+            utils.clean_up_daemon_hygiene()
+        for func in required_func_calls:
+            self.assertIn(func, mock_os.called_funcs)
        for func in bad_func_calls:
-            self.assertNotIn(func, mock_os.called_funcs)
+            self.assertIn(func, mock_os.called_funcs)
+        self.assertEqual('/', mock_os.called_funcs['chdir'][0])
+        self.assertEqual(0o22, mock_os.called_funcs['umask'][0])

    @reset_logger_state
    def test_capture_stdio(self):
--- a/test/unit/common/test_wsgi.py
+++ b/test/unit/common/test_wsgi.py
@@ -836,7 +836,8 @@ class TestWSGI(unittest.TestCase):

        with mock.patch.object(wsgi, '_initrp', _initrp), \
                mock.patch.object(wsgi, 'get_socket'), \
-                mock.patch.object(wsgi, 'drop_privileges'), \
+                mock.patch.object(wsgi, 'drop_privileges') as _d_privs, \
+                mock.patch.object(wsgi, 'clean_up_daemon_hygiene') as _c_hyg, \
                mock.patch.object(wsgi, 'loadapp', _loadapp), \
                mock.patch.object(wsgi, 'capture_stdio'), \
                mock.patch.object(wsgi, 'run_server'), \
@@ -849,6 +850,10 @@ class TestWSGI(unittest.TestCase):
                                                           socket=True,
                                                           select=True,
                                                           thread=True)
+        # run_wsgi() no longer calls drop_privileges() in the parent process,
+        # just clean_up_deemon_hygene()
+        self.assertEqual([], _d_privs.mock_calls)
+        self.assertEqual([mock.call()], _c_hyg.mock_calls)

    @mock.patch('swift.common.wsgi.run_server')
    @mock.patch('swift.common.wsgi.WorkersStrategy')
@@ -1353,36 +1358,11 @@ class TestServersPerPortStrategy(unittest.TestCase):
            6006, self.strategy.port_pid_state.port_for_sock(self.s1))
        self.assertEqual(
            6007, self.strategy.port_pid_state.port_for_sock(self.s2))
-        self.assertEqual([mock.call()], self.mock_setsid.mock_calls)
-        self.assertEqual([mock.call('/')], self.mock_chdir.mock_calls)
-        self.assertEqual([mock.call(0o22)], self.mock_umask.mock_calls)
-
-    def test_bind_ports_ignores_setsid_errors(self):
-        self.mock_setsid.side_effect = OSError()
-        self.strategy.do_bind_ports()
-
-        self.assertEqual(set((6006, 6007)), self.strategy.bind_ports)
-        self.assertEqual([
-            mock.call({'workers': 100,  # ignored
-                       'user': 'bob',
-                       'swift_dir': '/jim/cricket',
-                       'ring_check_interval': '76',
-                       'bind_ip': '2.3.4.5',
-                       'bind_port': 6006}),
-            mock.call({'workers': 100,  # ignored
-                       'user': 'bob',
-                       'swift_dir': '/jim/cricket',
-                       'ring_check_interval': '76',
-                       'bind_ip': '2.3.4.5',
-                       'bind_port': 6007}),
-        ], self.mock_get_socket.mock_calls)
-        self.assertEqual(
-            6006, self.strategy.port_pid_state.port_for_sock(self.s1))
-        self.assertEqual(
-            6007, self.strategy.port_pid_state.port_for_sock(self.s2))
-        self.assertEqual([mock.call()], self.mock_setsid.mock_calls)
-        self.assertEqual([mock.call('/')], self.mock_chdir.mock_calls)
-        self.assertEqual([mock.call(0o22)], self.mock_umask.mock_calls)
+        # strategy binding no longer does clean_up_deemon_hygene() actions, the
+        # user of the strategy does.
+        self.assertEqual([], self.mock_setsid.mock_calls)
+        self.assertEqual([], self.mock_chdir.mock_calls)
+        self.assertEqual([], self.mock_umask.mock_calls)

    def test_no_fork_sock(self):
        self.assertIsNone(self.strategy.no_fork_sock())
@@ -1519,7 +1499,7 @@ class TestServersPerPortStrategy(unittest.TestCase):
        self.strategy.post_fork_hook()

        self.assertEqual([
-            mock.call('bob', call_setsid=False),
+            mock.call('bob'),
        ], self.mock_drop_privileges.mock_calls)

    def test_shutdown_sockets(self):
@@ -1555,6 +1535,9 @@ class TestWorkersStrategy(unittest.TestCase):
        patcher = mock.patch('swift.common.wsgi.drop_privileges')
        self.mock_drop_privileges = patcher.start()
        self.addCleanup(patcher.stop)
+        patcher = mock.patch('swift.common.wsgi.clean_up_daemon_hygiene')
+        self.mock_clean_up_daemon_hygene = patcher.start()
+        self.addCleanup(patcher.stop)

    def test_loop_timeout(self):
        # This strategy should sit in the green.os.wait() for a bit (to avoid
@@ -1569,9 +1552,10 @@ class TestWorkersStrategy(unittest.TestCase):
        self.assertEqual([
            mock.call(self.conf),
        ], self.mock_get_socket.mock_calls)
-        self.assertEqual([
-            mock.call('bob'),
-        ], self.mock_drop_privileges.mock_calls)
+        # strategy binding no longer drops privileges nor does
+        # clean_up_deemon_hygene() actions.
+        self.assertEqual([], self.mock_drop_privileges.mock_calls)
+        self.assertEqual([], self.mock_clean_up_daemon_hygene.mock_calls)

        self.mock_get_socket.side_effect = wsgi.ConfigFilePortError()

@@ -1643,9 +1627,16 @@ class TestWorkersStrategy(unittest.TestCase):
        self.assertEqual([
            mock.call.shutdown_safe(self.mock_get_socket.return_value),
        ], mock_greenio.mock_calls)
-        self.assertEqual([
-            mock.call.close(),
-        ], self.mock_get_socket.return_value.mock_calls)
+        if six.PY2:
+            self.assertEqual([
+                mock.call.__nonzero__(),
+                mock.call.close(),
+            ], self.mock_get_socket.return_value.mock_calls)
+        else:
+            self.assertEqual([
+                mock.call.__bool__(),
+                mock.call.close(),
+            ], self.mock_get_socket.return_value.mock_calls)

    def test_log_sock_exit(self):
        self.strategy.log_sock_exit('blahblah', 'blahblah')
--- a/tox.ini
+++ b/tox.ini
@@ -9,12 +9,13 @@ install_command = pip install -U {opts} {packages}
 setenv = VIRTUAL_ENV={envdir}
         NOSE_WITH_COVERAGE=1
         NOSE_COVER_BRANCHES=1
+         NOSE_COVER_HTML_DIR={toxinidir}/cover
 deps =
  -c{env:UPPER_CONSTRAINTS_FILE:https://releases.openstack.org/constraints/upper/master}
  -r{toxinidir}/requirements.txt
  -r{toxinidir}/test-requirements.txt
-commands = find . ( -type f -o -type l ) -name "*.py[co]" -delete
-           find . -type d -name "__pycache__" -delete
+commands = find {envdir} ( -type f -o -type l ) -name "*.py[co]" -delete
+           find {envdir} -type d -name "__pycache__" -delete
           nosetests {posargs:test/unit}
 whitelist_externals = find
                      rm