systemd: Send STOPPING/RELOADING notifications

See https://www.freedesktop.org/software/systemd/man/sd_notify.html#Description
for more information.

Note that this requires that we keep the NOTIFY_SOCKET env var
around for more than just the first READY message, so we want to be
careful about when we're sending the default "READY=1".

UpgradeImpact
=============
Since prior versions of Swift would unset the NOTIFY_SOCKET env var,
services must be fully restarted (rather than seamlessly reloaded) to
emit the new messages.

Related-Change: Ice224fc2a6ba0150be180955037c13fc90365479
Change-Id: I201734ae0d6232ecb1923e67864dd928f90b6586
This commit is contained in:
Tim Burke 2022-04-12 14:00:48 -07:00
parent 212525118c
commit 55f7833d86
8 changed files with 62 additions and 12 deletions

View File

@ -159,7 +159,7 @@ class DaemonStrategy(object):
except KeyboardInterrupt:
self.logger.notice('User quit')
finally:
self.cleanup()
self.cleanup(stopping=True)
self.running = False
def _fork(self, once, **kwargs):
@ -167,6 +167,8 @@ class DaemonStrategy(object):
if pid == 0:
signal.signal(signal.SIGHUP, signal.SIG_DFL)
signal.signal(signal.SIGTERM, signal.SIG_DFL)
# only MAINPID should be sending notifications
os.environ.pop('NOTIFY_SOCKET', None)
self.daemon.run(once, **kwargs)
@ -245,7 +247,15 @@ class DaemonStrategy(object):
self.daemon.post_multiprocess_run()
return 0
def cleanup(self):
def cleanup(self, stopping=False):
"""
Cleanup worker processes
:param stopping: if set, tell systemd we're stopping
"""
if stopping:
utils.systemd_notify(self.logger, "STOPPING=1")
for p in self.spawned_pids():
try:
os.kill(p, signal.SIGTERM)

View File

@ -6206,17 +6206,28 @@ def get_db_files(db_path):
return sorted(results)
def systemd_notify(logger=None):
def systemd_notify(logger=None, msg=b"READY=1"):
"""
Notify the service manager that started this process, if it is
systemd-compatible, that this process correctly started. To do so,
it communicates through a Unix socket stored in environment variable
NOTIFY_SOCKET. More information can be found in systemd documentation:
Send systemd-compatible notifications.
Notify the service manager that started this process, if it has set the
NOTIFY_SOCKET environment variable. For example, systemd will set this
when the unit has ``Type=notify``. More information can be found in
systemd documentation:
https://www.freedesktop.org/software/systemd/man/sd_notify.html
Common messages include::
READY=1
RELOADING=1
STOPPING=1
STATUS=<some string>
:param logger: a logger object
:param msg: the message to send
"""
msg = b'READY=1'
if not isinstance(msg, bytes):
msg = msg.encode('utf8')
notify_socket = os.getenv('NOTIFY_SOCKET')
if notify_socket:
if notify_socket.startswith('@'):
@ -6227,7 +6238,6 @@ def systemd_notify(logger=None):
try:
sock.connect(notify_socket)
sock.sendall(msg)
del os.environ['NOTIFY_SOCKET']
except EnvironmentError:
if logger:
logger.debug("Systemd notification failed", exc_info=True)

View File

@ -487,6 +487,8 @@ class StrategyBase(object):
capture_stdio(self.logger)
drop_privileges(self.conf.get('user', 'swift'))
del self.tracking_data # children don't need to track siblings
# only MAINPID should be sending systemd notifications
os.environ.pop('NOTIFY_SOCKET', None)
def shutdown_sockets(self):
"""
@ -888,6 +890,7 @@ def run_wsgi(conf_path, app_section, *args, **kwargs):
run_server(conf, logger, no_fork_sock, global_conf=global_conf,
ready_callback=strategy.signal_ready,
allow_modify_pipeline=allow_modify_pipeline)
systemd_notify(logger, "STOPPING=1")
return 0
def stop_with_signal(signum, *args):
@ -981,8 +984,10 @@ def run_wsgi(conf_path, app_section, *args, **kwargs):
else:
logger.notice('%s received (%s)', signame, os.getpid())
if running_context[1] == signal.SIGTERM:
systemd_notify(logger, "STOPPING=1")
os.killpg(0, signal.SIGTERM)
elif running_context[1] == signal.SIGUSR1:
systemd_notify(logger, "RELOADING=1")
# set up a pipe, fork off a child to handle cleanup later,
# and rexec ourselves with an environment variable set which will
# indicate which fd (one of the pipe ends) to write a byte to
@ -1041,6 +1046,9 @@ def run_wsgi(conf_path, app_section, *args, **kwargs):
os.close(read_fd)
except Exception:
pass
else:
# SIGHUP or, less likely, run in "once" mode
systemd_notify(logger, "STOPPING=1")
strategy.shutdown_sockets()
signal.signal(signal.SIGTERM, signal.SIG_IGN)

View File

@ -158,6 +158,7 @@ class ContainerUpdater(Daemon):
pid2filename[pid] = tmpfilename
else:
signal.signal(signal.SIGTERM, signal.SIG_DFL)
os.environ.pop('NOTIFY_SOCKET', None)
eventlet_monkey_patch()
self.no_changes = 0
self.successes = 0

View File

@ -368,6 +368,7 @@ class ObjectAuditor(Daemon):
return pid
else:
signal.signal(signal.SIGTERM, signal.SIG_DFL)
os.environ.pop('NOTIFY_SOCKET', None)
if zero_byte_fps:
kwargs['zero_byte_fps'] = self.conf_zero_byte_fps
if sleep_between_zbf_scanner:

View File

@ -381,6 +381,7 @@ class ObjectUpdater(Daemon):
pids.append(pid)
else:
signal.signal(signal.SIGTERM, signal.SIG_DFL)
os.environ.pop('NOTIFY_SOCKET', None)
eventlet_monkey_patch()
self.stats.reset()
forkbegin = time.time()

View File

@ -4052,7 +4052,16 @@ cluster_dfw1 = http://dfw1.host/v1/
m_socket.assert_called_once_with(socket.AF_UNIX, socket.SOCK_DGRAM)
m_sock.connect.assert_called_once_with('foobar')
m_sock.sendall.assert_called_once_with(b'READY=1')
self.assertNotIn('NOTIFY_SOCKET', os.environ)
# Still there, so we can send STOPPING/RELOADING messages
self.assertIn('NOTIFY_SOCKET', os.environ)
m_socket.reset_mock()
m_sock.reset_mock()
logger = debug_logger()
utils.systemd_notify(logger, "RELOADING=1")
m_socket.assert_called_once_with(socket.AF_UNIX, socket.SOCK_DGRAM)
m_sock.connect.assert_called_once_with('foobar')
m_sock.sendall.assert_called_once_with(b'RELOADING=1')
# Abstract notification socket
m_socket.reset_mock()
@ -4062,7 +4071,7 @@ cluster_dfw1 = http://dfw1.host/v1/
m_socket.assert_called_once_with(socket.AF_UNIX, socket.SOCK_DGRAM)
m_sock.connect.assert_called_once_with('\0foobar')
m_sock.sendall.assert_called_once_with(b'READY=1')
self.assertNotIn('NOTIFY_SOCKET', os.environ)
self.assertIn('NOTIFY_SOCKET', os.environ)
# Test logger with connection error
m_sock = mock.Mock(connect=mock.Mock(side_effect=EnvironmentError),
@ -4094,7 +4103,7 @@ cluster_dfw1 = http://dfw1.host/v1/
msg = sock.recv(512)
sock.close()
self.assertEqual(msg, b'READY=1')
self.assertNotIn('NOTIFY_SOCKET', os.environ)
self.assertIn('NOTIFY_SOCKET', os.environ)
# test file socket address
socket_path = os.path.join(tempdir, 'foobar')

View File

@ -976,6 +976,8 @@ class TestWSGI(unittest.TestCase, ConfigAssertMixin):
mock.patch.object(wsgi, 'loadapp', _loadapp), \
mock.patch.object(wsgi, 'capture_stdio'), \
mock.patch.object(wsgi, 'run_server', _run_server), \
mock.patch(
'swift.common.wsgi.systemd_notify') as mock_notify, \
mock.patch('swift.common.utils.eventlet') as _utils_evt:
wsgi.run_wsgi('conf_file', 'app_section',
global_conf_callback=_global_conf_callback)
@ -986,6 +988,9 @@ class TestWSGI(unittest.TestCase, ConfigAssertMixin):
socket=True,
select=True,
thread=True)
self.assertEqual(mock_notify.mock_calls, [
mock.call('logger', "STOPPING=1"),
])
def test_run_server_success(self):
calls = defaultdict(int)
@ -1008,6 +1013,8 @@ class TestWSGI(unittest.TestCase, ConfigAssertMixin):
mock.patch.object(wsgi, 'loadapp', _loadapp), \
mock.patch.object(wsgi, 'capture_stdio'), \
mock.patch.object(wsgi, 'run_server'), \
mock.patch(
'swift.common.wsgi.systemd_notify') as mock_notify, \
mock.patch('swift.common.utils.eventlet') as _utils_evt:
rc = wsgi.run_wsgi('conf_file', 'app_section')
self.assertEqual(calls['_initrp'], 1)
@ -1017,6 +1024,9 @@ class TestWSGI(unittest.TestCase, ConfigAssertMixin):
socket=True,
select=True,
thread=True)
self.assertEqual(mock_notify.mock_calls, [
mock.call('logger', "STOPPING=1"),
])
# run_wsgi() no longer calls drop_privileges() in the parent process,
# just clean_up_daemon_hygene()
self.assertEqual([], _d_privs.mock_calls)