Allow rootwrap-daemon to timeout and exit

If the client side abnormally exits, its rootwrap daemon cannot
receive a shutdown message and will be left forever. Let it timeout
and exit to save such cases.

Change-Id: I783717b5fa019371747b98bf92965b6e689603f6
Related-bug: #1658973
Related-bug: #1658977
Related-bug: #1663458
This commit is contained in:
IWAMOTO Toshihiro 2017-02-28 15:12:01 +09:00
parent 0fa59b04e8
commit 6285b63572
4 changed files with 59 additions and 1 deletions

View File

@ -25,3 +25,6 @@ syslog_log_facility=syslog
# INFO means log all usage # INFO means log all usage
# ERROR means only log unsuccessful attempts # ERROR means only log unsuccessful attempts
syslog_log_level=ERROR syslog_log_level=ERROR
# Rootwrap daemon exits after this seconds of inactivity
daemon_timeout=600

View File

@ -26,6 +26,7 @@ import stat
import sys import sys
import tempfile import tempfile
import threading import threading
import time
from oslo_rootwrap import cmd from oslo_rootwrap import cmd
from oslo_rootwrap import jsonrpc from oslo_rootwrap import jsonrpc
@ -44,8 +45,11 @@ class RootwrapClass(object):
def __init__(self, config, filters): def __init__(self, config, filters):
self.config = config self.config = config
self.filters = filters self.filters = filters
self.reset_timer()
self.prepare_timer(config)
def run_one_command(self, userargs, stdin=None): def run_one_command(self, userargs, stdin=None):
self.reset_timer()
try: try:
obj = wrapper.start_subprocess( obj = wrapper.start_subprocess(
self.filters, userargs, self.filters, userargs,
@ -73,7 +77,40 @@ class RootwrapClass(object):
err = os.fsdecode(err) err = os.fsdecode(err)
return obj.returncode, out, err return obj.returncode, out, err
def shutdown(self): @classmethod
def reset_timer(cls):
cls.last_called = time.time()
@classmethod
def cancel_timer(cls):
try:
cls.timeout.cancel()
except RuntimeError:
pass
@classmethod
def prepare_timer(cls, config=None):
if config is not None:
cls.daemon_timeout = config.daemon_timeout
# Wait a bit longer to avoid rounding errors
timeout = max(
cls.last_called + cls.daemon_timeout - time.time(),
0) + 1
if getattr(cls, 'timeout', None):
# Another timer is already initialized
return
cls.timeout = threading.Timer(timeout, cls.handle_timeout)
cls.timeout.start()
@classmethod
def handle_timeout(cls):
if cls.last_called < time.time() - cls.daemon_timeout:
cls.shutdown()
cls.prepare_timer()
@staticmethod
def shutdown():
# Suicide to force break of the main thread # Suicide to force break of the main thread
os.kill(os.getpid(), signal.SIGINT) os.kill(os.getpid(), signal.SIGINT)
@ -144,6 +181,7 @@ def daemon_start(config, filters):
except Exception: except Exception:
# Most likely the socket have already been closed # Most likely the socket have already been closed
LOG.debug("Failed to close connection") LOG.debug("Failed to close connection")
RootwrapClass.cancel_timer()
LOG.info("Waiting for all client threads to finish.") LOG.info("Waiting for all client threads to finish.")
for thread in threading.enumerate(): for thread in threading.enumerate():
if thread.daemon: if thread.daemon:

View File

@ -22,6 +22,7 @@ import shutil
import signal import signal
import sys import sys
import threading import threading
import time
try: try:
import eventlet import eventlet
@ -52,6 +53,7 @@ class _FunctionalBase(object):
with open(self.config_file, 'w') as f: with open(self.config_file, 'w') as f:
f.write("""[DEFAULT] f.write("""[DEFAULT]
filters_path=%s filters_path=%s
daemon_timeout=10
exec_dirs=/bin""" % (filters_dir,)) exec_dirs=/bin""" % (filters_dir,))
with open(filters_file, 'w') as f: with open(filters_file, 'w') as f:
f.write("""[Filters] f.write("""[Filters]
@ -212,6 +214,15 @@ class RootwrapDaemonTest(_FunctionalBase, testtools.TestCase):
# Expect client to successfully restart daemon and run simple request # Expect client to successfully restart daemon and run simple request
self.test_run_once() self.test_run_once()
def test_daemon_timeout(self):
# Let the client start a daemon
self.execute(['echo'])
# Make daemon timeout
with mock.patch.object(self.client, '_restart') as restart:
time.sleep(15)
self.execute(['echo'])
restart.assert_called_once()
def _exec_thread(self, fifo_path): def _exec_thread(self, fifo_path):
try: try:
# Run a shell script that signals calling process through FIFO and # Run a shell script that signals calling process through FIFO and

View File

@ -91,6 +91,12 @@ class RootwrapConfig(object):
else: else:
self.use_syslog = False self.use_syslog = False
# daemon_timeout
if config.has_option("DEFAULT", "daemon_timeout"):
self.daemon_timeout = int(config.get("DEFAULT", "daemon_timeout"))
else:
self.daemon_timeout = 600
def setup_syslog(execname, facility, level): def setup_syslog(execname, facility, level):
rootwrap_logger = logging.getLogger() rootwrap_logger = logging.getLogger()