diff --git a/nodepool/cmd/nodepoold.py b/nodepool/cmd/nodepoold.py index 5e6af9209..c24d571c7 100644 --- a/nodepool/cmd/nodepoold.py +++ b/nodepool/cmd/nodepoold.py @@ -26,12 +26,34 @@ import logging.config import os import sys import signal +import traceback +import threading # No nodepool imports here because they pull in paramiko which must not be # imported until after the daemonization. # https://github.com/paramiko/paramiko/issues/59 +def stack_dump_handler(signum, frame): + signal.signal(signal.SIGUSR2, signal.SIG_IGN) + log_str = "" + threads = {} + for t in threading.enumerate(): + threads[t.ident] = t + for thread_id, stack_frame in sys._current_frames().items(): + thread = threads.get(thread_id) + if thread: + thread_name = thread.name + else: + thread_name = 'Unknown' + label = '%s (%s)' % (thread_name, thread_id) + log_str += "Thread: %s\n" % label + log_str += "".join(traceback.format_stack(stack_frame)) + log = logging.getLogger("nodepool.stack_dump") + log.debug(log_str) + signal.signal(signal.SIGUSR2, stack_dump_handler) + + class NodePoolDaemon(object): def __init__(self): self.args = None @@ -74,6 +96,7 @@ class NodePoolDaemon(object): self.pool = nodepool.nodepool.NodePool(self.args.config) signal.signal(signal.SIGUSR1, self.exit_handler) + signal.signal(signal.SIGUSR2, stack_dump_handler) signal.signal(signal.SIGTERM, self.term_handler) self.pool.start() diff --git a/nodepool/fakeprovider.py b/nodepool/fakeprovider.py index 58fbc7b3f..d02de3aaa 100644 --- a/nodepool/fakeprovider.py +++ b/nodepool/fakeprovider.py @@ -69,7 +69,9 @@ class FakeList(object): metadata={}, manager=self) self._list.append(s) - t = threading.Thread(target=self._finish, args=(s, 0.5, 'ACTIVE')) + t = threading.Thread(target=self._finish, + name='FakeProvider create', + args=(s, 0.5, 'ACTIVE')) t.start() return s diff --git a/nodepool/nodepool.py b/nodepool/nodepool.py index b44bdc73f..037a75067 100644 --- a/nodepool/nodepool.py +++ b/nodepool/nodepool.py @@ -52,7 +52,7 @@ class NodeCompleteThread(threading.Thread): log = logging.getLogger("nodepool.NodeCompleteThread") def __init__(self, nodepool, nodename, jobname, result): - threading.Thread.__init__(self) + threading.Thread.__init__(self, name='NodeCompleteThread for %s' % nodename) self.nodename = nodename self.nodepool = nodepool self.jobname = jobname @@ -104,7 +104,7 @@ class NodeUpdateListener(threading.Thread): log = logging.getLogger("nodepool.NodeUpdateListener") def __init__(self, nodepool, addr): - threading.Thread.__init__(self) + threading.Thread.__init__(self, name='NodeUpdateListener') self.nodepool = nodepool self.socket = self.nodepool.zmq_context.socket(zmq.SUB) event_filter = b"" @@ -218,7 +218,7 @@ class NodeLauncher(threading.Thread): log = logging.getLogger("nodepool.NodeLauncher") def __init__(self, nodepool, provider, image, target, node_id, timeout): - threading.Thread.__init__(self) + threading.Thread.__init__(self, name='NodeLauncher for %s' % node_id) self.provider = provider self.image = image self.target = target @@ -352,7 +352,7 @@ class ImageUpdater(threading.Thread): log = logging.getLogger("nodepool.ImageUpdater") def __init__(self, nodepool, provider, image, snap_image_id): - threading.Thread.__init__(self) + threading.Thread.__init__(self, name='ImageUpdater for %s' % snap_image_id) self.provider = provider self.image = image self.snap_image_id = snap_image_id @@ -543,7 +543,7 @@ class NodePool(threading.Thread): log = logging.getLogger("nodepool.NodePool") def __init__(self, configfile): - threading.Thread.__init__(self) + threading.Thread.__init__(self, name='NodePool') self.configfile = configfile self._stopped = False self.config = None diff --git a/nodepool/task_manager.py b/nodepool/task_manager.py index dc6d7c26e..fb6db3b11 100644 --- a/nodepool/task_manager.py +++ b/nodepool/task_manager.py @@ -57,7 +57,7 @@ class TaskManager(threading.Thread): log = logging.getLogger("nodepool.ProviderManager") def __init__(self, client, name, rate): - super(TaskManager, self).__init__() + super(TaskManager, self).__init__(name=name) self.daemon = True self.queue = Queue.Queue() self._running = True