From 220edc1ccc6a0bc3dfb94a92946bf2b9a6cc0c61 Mon Sep 17 00:00:00 2001 From: Chris Friesen Date: Thu, 7 Apr 2016 11:16:19 -0600 Subject: [PATCH] Notification of death of arbitrary processes Note: this commit was copied from Titanium Cloud Rel2 This exposes a new feature which may be called to request notification when an arbitrary process changes state. The caller specifies a pid, signal number, and event mask, and when that pid dies, or is stopped, or anything else that would normally cause a SIGCHLD, the kernel will send the specified signal to the caller if the event is in the event mask originally passed down. The siginfo_t struct will contain the same information as would be included with SIGCHLD. This is exposed to userspace via the prctl() call with the PR_DO_NOTIFY_TASK_STATE option. Signed-off-by: Jim Somerville --- include/linux/init_task.h | 9 ++ include/linux/sched.h | 6 ++ include/uapi/linux/prctl.h | 18 ++++ init/Kconfig | 15 +++ kernel/Makefile | 1 + kernel/death_notify.c | 227 +++++++++++++++++++++++++++++++++++++++++++++ kernel/death_notify.h | 45 +++++++++ kernel/exit.c | 6 ++ kernel/fork.c | 4 + kernel/signal.c | 11 +++ kernel/sys.c | 9 ++ 11 files changed, 351 insertions(+) create mode 100644 kernel/death_notify.c create mode 100644 kernel/death_notify.h diff --git a/include/linux/init_task.h b/include/linux/init_task.h index d8c82e0..ba0c12e 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -77,6 +77,14 @@ extern struct nsproxy init_nsproxy; .signalfd_wqh = __WAIT_QUEUE_HEAD_INITIALIZER(sighand.signalfd_wqh), \ } +#ifdef CONFIG_SIGEXIT +#define INIT_SIGEXIT(tsk) \ + .notify = LIST_HEAD_INIT(tsk.notify), \ + .monitor = LIST_HEAD_INIT(tsk.monitor), +#else +#define INIT_SIGEXIT(tsk) +#endif + extern struct group_info init_groups; #define INIT_STRUCT_PID { \ @@ -231,6 +239,7 @@ extern struct task_group root_task_group; .alloc_lock = __SPIN_LOCK_UNLOCKED(tsk.alloc_lock), \ .journal_info = NULL, \ .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \ + INIT_SIGEXIT(tsk) \ .pi_lock = __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock), \ .timer_slack_ns = 50000, /* 50 usec default slack */ \ INIT_TIMER_LIST \ diff --git a/include/linux/sched.h b/include/linux/sched.h index 97ff026..0785453 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1686,6 +1686,12 @@ struct task_struct { short il_next; short pref_node_fork; #endif +#ifdef CONFIG_SIGEXIT + /* list of processes to notify on death */ + struct list_head notify; + /* list of outstanding monitor requests */ + struct list_head monitor; +#endif #ifdef CONFIG_NUMA_BALANCING int numa_scan_seq; unsigned int numa_scan_period; diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index a817b5c..2cf7776 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -55,6 +55,24 @@ #define PR_SET_NAME 15 /* Set process name */ #define PR_GET_NAME 16 /* Get process name */ +#ifdef CONFIG_SIGEXIT +#define PR_DO_NOTIFY_TASK_STATE 17 /* Set/get notification for task + state changes */ + +/* This is the data structure for requestion process death + * (and other state change) information. Sig of -1 means + * query, sig of 0 means deregistration, positive sig means + * that you want to set it. sig and events are value-result + * and will be updated with the previous values on every + * successful call. + */ +struct task_state_notify_info { + pid_t pid; + int sig; + unsigned int events; +}; +#endif + /* Get/set process endian */ #define PR_GET_ENDIAN 19 #define PR_SET_ENDIAN 20 diff --git a/init/Kconfig b/init/Kconfig index 1d645a1..37e48c0 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1581,6 +1581,21 @@ config VM_EVENT_COUNTERS on EXPERT systems. /proc/vmstat will only show page counts if VM event counters are disabled. +config SIGEXIT + bool "Notification of death of arbitrary processes" + default n + help + When enabled this exposes a new feature which may be called to request + notification when an arbitrary process changes state. The caller specifies + a pid, signal number, and event mask, and when that pid dies, or is + stopped, or anything else that would normally cause a SIGCHLD, the + kernel will send the specified signal to the caller if the event is in + the event mask originally passed down. The siginfo_t struct will + contain the same information as would be included with SIGCHLD. + + This is exposed to userspace via the prctl() + call with the PR_DO_NOTIFY_TASK_STATE option + config SLUB_DEBUG default y bool "Enable SLUB debugging support" if EXPERT diff --git a/kernel/Makefile b/kernel/Makefile index 762218c..d357e7d 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -120,6 +120,7 @@ obj-$(CONFIG_TRACEPOINTS) += trace/ obj-$(CONFIG_IRQ_WORK) += irq_work.o obj-$(CONFIG_CPU_PM) += cpu_pm.o obj-$(CONFIG_BPF) += bpf/ +obj-$(CONFIG_SIGEXIT) += death_notify.o obj-$(CONFIG_PERF_EVENTS) += events/ diff --git a/kernel/death_notify.c b/kernel/death_notify.c new file mode 100644 index 0000000..5eb8bfc --- /dev/null +++ b/kernel/death_notify.c @@ -0,0 +1,227 @@ +/* + * kernel/death_notify.c, Process death notification support + * + * Copyright (c) 2006-2014 Wind River Systems, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include +#include +#include +#include +#include + +#include "death_notify.h" + +static void unlink_status_notifier(struct signotifier *n) +{ + list_del(&n->monitor_list); + list_del(&n->notify_list); + kfree(n); +} + +static void handle_already_monitoring(struct signotifier *node, + struct task_state_notify_info *args, + struct task_state_notify_info *oldargs) +{ + /* Store the old values */ + oldargs->sig = node->sig; + oldargs->events = node->events; + + /* We know that args->sig is 0 or a valid signal. */ + if (args->sig > 0) { + /* Update the new values */ + node->sig = args->sig; + node->events = args->events; + } else if (!args->sig) { + /* args->sig of 0 means to deregister */ + unlink_status_notifier(node); + } +} + +static void setup_new_node(struct task_struct *p, + struct signotifier *node, + struct task_state_notify_info *args) +{ + node->notify_tsk = current; + node->sig = args->sig; + node->events = args->events; + + /* Add this node to the list of notification requests + * for the specified process. + */ + list_add_tail(&node->notify_list, &p->notify); + + /* Also add this node to the list of monitor requests + * for the current process. + */ + list_add_tail(&node->monitor_list, ¤t->monitor); +} + + +/* Returns 0 if arguments are valid, 1 if they are not. */ +static int invalid_args(struct task_state_notify_info *args) +{ + int ret = 1; + + if (args->pid <= 0) + goto out; + + /* Sig of -1 implies query, sig of 0 implies deregistration. + * Otherwise sig must be positive and within range. + */ + if ((args->sig < -1) || (args->sig > _NSIG)) + goto out; + + /* If positive sig, must have valid events. */ + if (args->sig > 0) { + if (!args->events || (args->events >= (1 << (NSIGCHLD+1)))) + goto out; + } + + ret = 0; +out: + return ret; +} + +/* Notify those registered for process state updates via do_notify_task_state(). + * If "del" is nonzero, the process is dying and we want to free + * the nodes in the list as we go. + * + * Note: we only notify processes for events in which they have registered + * interest. + * + * Must be called holding a lock on tasklist_lock. + */ +void do_notify_others(struct task_struct *tsk, struct siginfo *info) +{ + struct signotifier *node; + unsigned int events; + + /* This method of generating the event bit must be + * matched in the userspace library. + */ + events = 1 << (info->si_code & 0xFF); + + list_for_each_entry(node, &tsk->notify, notify_list) { + if (events & node->events) { + info->si_signo = node->sig; + group_send_sig_info(node->sig, info, node->notify_tsk); + } + } +} + +void release_notify_others(struct task_struct *p) +{ + struct signotifier *n, *t; + + /* Need to clean up any outstanding requests where we + * wanted to be notified when others died. + */ + list_for_each_entry_safe(n, t, &p->monitor, monitor_list) { + unlink_status_notifier(n); + } + + /* Also need to clean up any outstanding requests where others + * wanted to be notified when we died. + */ + list_for_each_entry_safe(n, t, &p->notify, notify_list) { + unlink_status_notifier(n); + } +} + +/* If the config is defined, then processes can call this routine + * to request notification when the specified task's state changes. + * On the death (or other state change) of the specified process, + * we will send them the specified signal if the event is listed + * in their event bitfield. + * + * A sig of 0 means that we want to deregister. + * + * The sig/events fields are value/result. On success we update them + * to reflect what they were before the call. + * + * Returns error code on error, on success we return 0. + */ +int do_notify_task_state(unsigned long arg) +{ + int err; + struct task_struct *p; + struct signotifier *node, *tmp; + struct task_state_notify_info args, oldargs; + + if (copy_from_user(&args, (struct task_state_notify_info __user *)arg, + sizeof(args))) + return -EFAULT; + oldargs.pid = args.pid; + + /* Validate the arguments passed in. */ + err = -EINVAL; + if (invalid_args(&args)) + goto out; + + /* We must hold a write lock on tasklist_lock to add the notification + * later on, and we need some lock on tasklist_lock for + * find_task_by_pid(), so may as well take the write lock now. + * Must use write_lock_irq(). + */ + write_lock_irq(&tasklist_lock); + + err = -ESRCH; + p = find_task_by_vpid(args.pid); + if (!p) + goto unlock_out; + + /* Now we know pid exists, unlikely to fail. */ + err = 0; + + /* Check if we're already monitoring the specified pid. If so, update + * the monitoring parameters and return the old ones. + */ + list_for_each_entry(tmp, &p->notify, notify_list) { + if (tmp->notify_tsk == current) { + handle_already_monitoring(tmp, &args, &oldargs); + goto unlock_out; + } + } + + /* If we get here, we're not currently monitoring the process. */ + oldargs.sig = 0; + oldargs.events = 0; + + /* If we wanted to set up a new monitor, do it now. If we didn't + * manage to allocate memory for the new node, then we return + * an appropriate error. + */ + if (args.sig > 0) { + node = kmalloc(sizeof(*node), GFP_ATOMIC); + if (node) + setup_new_node(p, node, &args); + else + err = -ENOMEM; + } + +unlock_out: + write_unlock_irq(&tasklist_lock); + + /* Copy the old values back to caller. */ + if (copy_to_user((struct task_state_notify_info __user *)arg, + &oldargs, sizeof(oldargs))) + err = -EFAULT; + +out: + return err; +} diff --git a/kernel/death_notify.h b/kernel/death_notify.h new file mode 100644 index 0000000..b2b8e8c --- /dev/null +++ b/kernel/death_notify.h @@ -0,0 +1,45 @@ +/* + * kernel/death_notify.h, Process death notification support + * + * Copyright (c) 2006-2014 Wind River Systems, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ +#ifndef _KERNEL_DEATH_NOTIFY_H +#define _KERNEL_DEATH_NOTIFY_H + +#ifdef CONFIG_SIGEXIT + +struct signotifier { + struct task_struct *notify_tsk; + struct list_head notify_list; + struct list_head monitor_list; + int sig; + unsigned int events; +}; + +extern int do_notify_task_state(unsigned long arg); +extern void do_notify_others(struct task_struct *tsk, + struct siginfo *info); +extern void release_notify_others(struct task_struct *p); + +#else /* !CONFIG_SIGEXIT */ + +static inline void do_notify_others(struct task_struct *tsk, + struct siginfo *info) {} +static inline void release_notify_others(struct task_struct *p) {} + +#endif /* CONFIG_SIGEXIT */ +#endif diff --git a/kernel/exit.c b/kernel/exit.c index 8a908ea..448a3c3 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -59,6 +59,9 @@ #include #include #include +#ifdef CONFIG_SIGEXIT +#include "death_notify.h" +#endif static void exit_mm(struct task_struct * tsk); @@ -184,6 +187,9 @@ repeat: proc_flush_task(p); tasklist_write_lock_irq(); +#ifdef CONFIG_SIGEXIT + release_notify_others(p); +#endif ptrace_release_task(p); __exit_signal(p); diff --git a/kernel/fork.c b/kernel/fork.c index 6bda4c0..f3cd3ad 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1518,6 +1518,10 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->sequential_io = 0; p->sequential_io_avg = 0; #endif +#ifdef CONFIG_SIGEXIT + INIT_LIST_HEAD(&p->notify); + INIT_LIST_HEAD(&p->monitor); +#endif /* Perform scheduler related setup. Assign this task to a CPU. */ retval = sched_fork(clone_flags, p); diff --git a/kernel/signal.c b/kernel/signal.c index 59e84a2..728daa9 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -47,6 +47,9 @@ #include #include #include "audit.h" /* audit_signal_info() */ +#ifdef CONFIG_SIGEXIT +#include "death_notify.h" +#endif /* * SLAB caches for signal bits. @@ -1849,6 +1852,10 @@ bool do_notify_parent(struct task_struct *tsk, int sig) __wake_up_parent(tsk, tsk->parent); spin_unlock_irqrestore(&psig->siglock, flags); +#ifdef CONFIG_SIGEXIT + do_notify_others(tsk, &info); +#endif + return autoreap; } @@ -1920,6 +1927,10 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, */ __wake_up_parent(tsk, parent); spin_unlock_irqrestore(&sighand->siglock, flags); + +#ifdef CONFIG_SIGEXIT + do_notify_others(tsk, &info); +#endif } static inline int may_ptrace_stop(void) diff --git a/kernel/sys.c b/kernel/sys.c index e79ea7e..5d43b93 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -67,6 +67,10 @@ /* Hardening for Spectre-v1 */ #include +#ifdef CONFIG_SIGEXIT +#include "death_notify.h" +#endif + #ifndef SET_UNALIGN_CTL # define SET_UNALIGN_CTL(a,b) (-EINVAL) #endif @@ -2478,6 +2482,11 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, else error = PR_MCE_KILL_DEFAULT; break; +#ifdef CONFIG_SIGEXIT + case PR_DO_NOTIFY_TASK_STATE: + error = do_notify_task_state(arg2); + break; +#endif case PR_SET_MM: error = prctl_set_mm(arg2, arg3, arg4, arg5); break; -- 2.7.4