restart: don't stop process on sighup when mutating

It seems that the code for handling SIGHUP currently calls stop()
on the service, then calls reset(), then calls start() on it again.

This is effectively a full service restart, which breaks the whole
point behind using SIGHUP for hot and quick reloads.  It also breaks
our downstream projects in a few ways where they lose RPC on reload
due to the fact that they don't expect to have stop() called on a
reset().

This patch removes the stop and start when the restart_method is
set to 'mutate' because in that case we should just be signaling
the service to check for changes in its mutable config options.
It also changes the signal sent to children in that case to
SIGHUP, since SIGTERM will cause unnecessary restarts of child
processes.

The previous behavior is maintained for the 'reload' restart_method
since that does a complete reload of the service config, which is
not safe to do without restarting the service completely.

Change-Id: I86a34c22d41d87a9cce2d4ac6d95562d05823ecf
Closes-Bug: #1794708
Co-Authored-By: Ben Nemec <bnemec@redhat.com>
This commit is contained in:
Mohammed Naser 2019-03-07 22:09:49 -05:00 committed by Eric Fried
parent ca6f839fc4
commit e7dd291689
2 changed files with 35 additions and 16 deletions

View File

@ -143,10 +143,15 @@ requests have been fully served.
To force instantaneous termination SIGINT signal must be sent.
On receiving SIGHUP configuration files are reloaded and a service
is being reset and started again. Then all child workers are gracefully
stopped using SIGTERM and workers with new configuration are
spawned. Thus, SIGHUP can be used for changing config options on the go.
The behavior on receiving SIGHUP varies based on how the service is configured.
If the launcher uses the ``reload`` restart_method (the default), then the
service will reload its configuration and any threads will be completely
restarted. If the ``mutate`` restart_method is used, then only the
configuration options marked as mutable will be reloaded and the service
threads will not be restarted.
See :py:class:`oslo_service.service.Launcher` for more details on the
``restart_method`` parameter.
*NOTE:* SIGHUP is not supported on Windows.
*NOTE:* Config option graceful_shutdown_timeout is not supported on Windows.

View File

@ -261,12 +261,10 @@ class Launcher(object):
"""
self.conf = conf
conf.register_opts(_options.service_opts)
self.services = Services()
self.services = Services(restart_method=restart_method)
self.backdoor_port = (
eventlet_backdoor.initialize_if_enabled(self.conf))
self.restart_method = restart_method
if restart_method not in _LAUNCHER_RESTART_METHODS:
raise ValueError(_("Invalid restart_method: %s") % restart_method)
def launch_service(self, service, workers=1):
"""Load and start the given service.
@ -309,7 +307,7 @@ class Launcher(object):
"""
if self.restart_method == 'reload':
self.conf.reload_config_files()
elif self.restart_method == 'mutate':
else: # self.restart_method == 'mutate'
self.conf.mutate_config_files()
self.services.restart()
@ -372,7 +370,7 @@ class ServiceLauncher(Launcher):
super(ServiceLauncher, self).wait()
except SignalExit as exc:
signame = self.signal_handler.signals_to_name[exc.signo]
LOG.info('Caught %s, exiting', signame)
LOG.info('Caught %s, handling', signame)
status = exc.code
signo = exc.signo
except SystemExit as exc:
@ -518,7 +516,7 @@ class ProcessLauncher(object):
launcher.wait()
except SignalExit as exc:
signame = self.signal_handler.signals_to_name[exc.signo]
LOG.info('Child caught %s, exiting', signame)
LOG.info('Child caught %s, handling', signame)
status = exc.code
signo = exc.signo
except SystemExit as exc:
@ -668,16 +666,18 @@ class ProcessLauncher(object):
if not _is_sighup_and_daemon(self.sigcaught):
break
child_signal = signal.SIGTERM
if self.restart_method == 'reload':
self.conf.reload_config_files()
elif self.restart_method == 'mutate':
self.conf.mutate_config_files()
child_signal = signal.SIGHUP
for service in set(
[wrap.service for wrap in self.children.values()]):
service.reset()
for pid in self.children:
os.kill(pid, signal.SIGTERM)
os.kill(pid, child_signal)
self.running = True
self.sigcaught = None
@ -743,7 +743,10 @@ class Service(ServiceBase):
class Services(object):
def __init__(self):
def __init__(self, restart_method='reload'):
if restart_method not in _LAUNCHER_RESTART_METHODS:
raise ValueError(_("Invalid restart_method: %s") % restart_method)
self.restart_method = restart_method
self.services = []
self.tg = threadgroup.ThreadGroup()
self.done = event.Event()
@ -776,12 +779,23 @@ class Services(object):
self.tg.wait()
def restart(self):
"""Reset services and start them in new threads."""
self.stop()
self.done = event.Event()
"""Reset services.
The behavior of this function varies depending on the value of the
restart_method member. If the restart_method is `reload`, then it
will stop the services, reset them, and start them in new threads.
If the restart_method is `mutate`, then it will just reset the
services without restarting them.
"""
if self.restart_method == 'reload':
self.stop()
self.done = event.Event()
for restart_service in self.services:
restart_service.reset()
self.tg.add_thread(self.run_service, restart_service, self.done)
if self.restart_method == 'reload':
self.tg.add_thread(self.run_service,
restart_service,
self.done)
@staticmethod
def run_service(service, done):