Add stack dump handler to builder

This allows us to see the running threads and their stack traces.

Change-Id: I1b618ae662730f62c02bdb5f9573a3bb25072c9a
This commit is contained in:
James E. Blair 2016-12-01 08:59:43 -08:00
parent 6c1c19e981
commit 343f9d0b6e
5 changed files with 35 additions and 24 deletions

View File

@ -149,6 +149,7 @@ class CleanupWorker(BaseWorker):
def __init__(self, name, config_path, interval):
super(CleanupWorker, self).__init__(config_path, interval)
self.log = logging.getLogger("nodepool.builder.CleanupWorker.%s" % name)
self.name = 'CleanupWorker.%s' % name
def _buildUploadRecencyTable(self):
'''
@ -446,6 +447,7 @@ class BuildWorker(BaseWorker):
def __init__(self, name, config_path, interval):
super(BuildWorker, self).__init__(config_path, interval)
self.log = logging.getLogger("nodepool.builder.BuildWorker.%s" % name)
self.name = 'BuildWorker.%s' % name
def _checkForScheduledImageUpdates(self):
'''
@ -715,6 +717,7 @@ class UploadWorker(BaseWorker):
def __init__(self, name, config_path, interval):
super(UploadWorker, self).__init__(config_path, interval)
self.log = logging.getLogger("nodepool.builder.UploadWorker.%s" % name)
self.name = 'UploadWorker.%s' % name
def _uploadImage(self, build_id, upload_id, image_name, images, provider):
'''

View File

@ -17,6 +17,30 @@
import logging
import logging.config
import os
import signal
import sys
import threading
import traceback
def stack_dump_handler(signum, frame):
signal.signal(signal.SIGUSR2, signal.SIG_IGN)
log_str = ""
threads = {}
for t in threading.enumerate():
threads[t.ident] = t
for thread_id, stack_frame in sys._current_frames().items():
thread = threads.get(thread_id)
if thread:
thread_name = thread.name
else:
thread_name = 'Unknown'
label = '%s (%s)' % (thread_name, thread_id)
log_str += "Thread: %s\n" % label
log_str += "".join(traceback.format_stack(stack_frame))
log = logging.getLogger("nodepool.stack_dump")
log.debug(log_str)
signal.signal(signal.SIGUSR2, stack_dump_handler)
class NodepoolApp(object):

View File

@ -61,6 +61,7 @@ class NodePoolBuilderApp(nodepool.cmd.NodepoolApp):
self.args.upload_workers)
signal.signal(signal.SIGINT, self.sigint_handler)
signal.signal(signal.SIGUSR2, nodepool.cmd.stack_dump_handler)
self.nb.start()
while True:

View File

@ -23,12 +23,9 @@ import extras
# instead it depends on lockfile-0.9.1 which uses pidfile.
pid_file_module = extras.try_imports(['daemon.pidlockfile', 'daemon.pidfile'])
import logging.config
import os
import sys
import signal
import traceback
import threading
import nodepool.builder
import nodepool.cmd
@ -36,26 +33,6 @@ import nodepool.nodepool
import nodepool.webapp
def stack_dump_handler(signum, frame):
signal.signal(signal.SIGUSR2, signal.SIG_IGN)
log_str = ""
threads = {}
for t in threading.enumerate():
threads[t.ident] = t
for thread_id, stack_frame in sys._current_frames().items():
thread = threads.get(thread_id)
if thread:
thread_name = thread.name
else:
thread_name = 'Unknown'
label = '%s (%s)' % (thread_name, thread_id)
log_str += "Thread: %s\n" % label
log_str += "".join(traceback.format_stack(stack_frame))
log = logging.getLogger("nodepool.stack_dump")
log.debug(log_str)
signal.signal(signal.SIGUSR2, stack_dump_handler)
def is_pidfile_stale(pidfile):
""" Determine whether a PID file is stale.
@ -139,7 +116,7 @@ class NodePoolDaemon(nodepool.cmd.NodepoolApp):
# For back compatibility:
signal.signal(signal.SIGUSR1, self.exit_handler)
signal.signal(signal.SIGUSR2, stack_dump_handler)
signal.signal(signal.SIGUSR2, nodepool.cmd.stack_dump_handler)
signal.signal(signal.SIGTERM, self.term_handler)
self.pool.start()

View File

@ -292,6 +292,12 @@ class BaseTestCase(testtools.TestCase):
if t.name.startswith("worker "):
# paste web server
continue
if t.name.startswith("UploadWorker"):
continue
if t.name.startswith("BuildWorker"):
continue
if t.name.startswith("CleanupWorker"):
continue
if t.name not in whitelist:
done = False
if done: