[PATCH] pid_ns: Fix proc_flush_task() accessing freed proc_mnt

Previous message: [thread] [date] [author]
Next message: [thread] [date] [author]
From: Louis Rilling
Date: Thursday, June 24, 2010 - 1:37 am

On 06/19, Oleg Nesterov wrote:

It's completely untested and could be split into three patches. But I think that
it solves the issues found so far, and that it will work with Eric's
unshare(CLONE_NEWPID) too.

What do you think about this approach?

Thanks,

Louis

On 20/06/10 20:06 +0200, Oleg Nesterov wrote:

This patch does four things:
- defer pid_ns_release_proc()->mntput() to a worqueue context, so that
  pid_ns_release_proc() can be called in atomic context;
- introduce pid_ns->nr_pids, so that we can count the number of pids
  allocated by alloc_pidmap();
- move the call to pid_ns_prepare_proc() to alloc_pid(), where we know
  when the first pid of a namespace is allocated;
- move the call to pid_ns_release_proc() to free_pid(), where we are now
  able to know when the last pid of a namespace is detached.

This solves the missing mntput() in copy_process() cleanup path, since
free_pid() is called to cleanup alloc_pid().

This solves the multi-threaded init doing exec issue, since all
sub-threads including former leader have called proc_flush_task() when the
last pid is detached.

This solves the EXIT_DEAD tasks issue for the same reason.

Signed-off-by: Louis Rilling <louis.rilling@kerlabs.com>
---
 fs/proc/base.c                |    4 ----
 fs/proc/root.c                |   11 ++++++++++-
 include/linux/pid_namespace.h |    3 +++
 kernel/fork.c                 |    6 ------
 kernel/pid.c                  |   16 +++++++++++++---
 kernel/pid_namespace.c        |    1 +
 6 files changed, 27 insertions(+), 14 deletions(-)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index acb7ef8..455b109 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2745,10 +2745,6 @@ void proc_flush_task(struct task_struct *task)
 		proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr,
 					tgid->numbers[i].nr);
 	}
-
-	upid = &pid->numbers[pid->level];
-	if (upid->nr == 1)
-		pid_ns_release_proc(upid->ns);
 }
 
 static struct dentry *proc_pid_instantiate(struct inode *dir,
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 4258384..9876cd9 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -215,7 +215,16 @@ int pid_ns_prepare_proc(struct pid_namespace *ns)
 	return 0;
 }
 
-void pid_ns_release_proc(struct pid_namespace *ns)
+static void do_pid_ns_release_proc(struct work_struct *work)
 {
+	struct pid_namespace *ns;
+
+	ns = container_of(work, struct pid_namespace, release_proc_work);
 	mntput(ns->proc_mnt);
 }
+
+void pid_ns_release_proc(struct pid_namespace *ns)
+{
+	INIT_WORK(&ns->release_proc_work, do_pid_ns_release_proc);
+	schedule_work(&ns->release_proc_work);
+}
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index 38d1032..1010733 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -4,6 +4,7 @@
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/threads.h>
+#include <linux/workqueue.h>
 #include <linux/nsproxy.h>
 #include <linux/kref.h>
 
@@ -19,6 +20,7 @@ struct bsd_acct_struct;
 struct pid_namespace {
 	struct kref kref;
 	struct pidmap pidmap[PIDMAP_ENTRIES];
+	atomic_t nr_pids;
 	int last_pid;
 	struct task_struct *child_reaper;
 	struct kmem_cache *pid_cachep;
@@ -26,6 +28,7 @@ struct pid_namespace {
 	struct pid_namespace *parent;
 #ifdef CONFIG_PROC_FS
 	struct vfsmount *proc_mnt;
+	struct work_struct release_proc_work;
 #endif
 #ifdef CONFIG_BSD_PROCESS_ACCT
 	struct bsd_acct_struct *bacct;
diff --git a/kernel/fork.c b/kernel/fork.c
index b6cce14..b063a9c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1154,12 +1154,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 		pid = alloc_pid(p->nsproxy->pid_ns);
 		if (!pid)
 			goto bad_fork_cleanup_io;
-
-		if (clone_flags & CLONE_NEWPID) {
-			retval = pid_ns_prepare_proc(p->nsproxy->pid_ns);
-			if (retval < 0)
-				goto bad_fork_free_pid;
-		}
 	}
 
 	p->pid = pid_nr(pid);
diff --git a/kernel/pid.c b/kernel/pid.c
index e9fd8c1..fdb73e1 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -34,6 +34,7 @@
 #include <linux/bootmem.h>
 #include <linux/hash.h>
 #include <linux/pid_namespace.h>
+#include <linux/proc_fs.h>
 #include <linux/init_task.h>
 #include <linux/syscalls.h>
 
@@ -112,7 +113,7 @@ EXPORT_SYMBOL(is_container_init);
 
 static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock);
 
-static void free_pidmap(struct upid *upid)
+static bool free_pidmap(struct upid *upid)
 {
 	int nr = upid->nr;
 	struct pidmap *map = upid->ns->pidmap + nr / BITS_PER_PAGE;
@@ -120,6 +121,7 @@ static void free_pidmap(struct upid *upid)
 
 	clear_bit(offset, map->page);
 	atomic_inc(&map->nr_free);
+	return atomic_dec_and_test(&upid->ns->nr_pids);
 }
 
 static int alloc_pidmap(struct pid_namespace *pid_ns)
@@ -154,6 +156,7 @@ static int alloc_pidmap(struct pid_namespace *pid_ns)
 			do {
 				if (!test_and_set_bit(offset, map->page)) {
 					atomic_dec(&map->nr_free);
+					atomic_inc(&pid_ns->nr_pids);
 					pid_ns->last_pid = pid;
 					return pid;
 				}
@@ -226,6 +229,7 @@ static void delayed_put_pid(struct rcu_head *rhp)
 void free_pid(struct pid *pid)
 {
 	/* We can be called with write_lock_irq(&tasklist_lock) held */
+	struct upid *upid;
 	int i;
 	unsigned long flags;
 
@@ -234,8 +238,11 @@ void free_pid(struct pid *pid)
 		hlist_del_rcu(&pid->numbers[i].pid_chain);
 	spin_unlock_irqrestore(&pidmap_lock, flags);
 
-	for (i = 0; i <= pid->level; i++)
-		free_pidmap(pid->numbers + i);
+	for (i = 0; i <= pid->level; i++) {
+		upid = pid->numbers + i;
+		if (free_pidmap(upid))
+			pid_ns_release_proc(upid->ns);
+	}
 
 	call_rcu(&pid->rcu, delayed_put_pid);
 }
@@ -276,6 +283,9 @@ struct pid *alloc_pid(struct pid_namespace *ns)
 				&pid_hash[pid_hashfn(upid->nr, upid->ns)]);
 	spin_unlock_irq(&pidmap_lock);
 
+	if (pid->numbers[pid->level].nr == 1)
+		pid_ns_prepare_proc(ns);
+
 out:
 	return pid;
 
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index a5aff94..beba2b4 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -92,6 +92,7 @@ static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_p
 
 	set_bit(0, ns->pidmap[0].page);
 	atomic_set(&ns->pidmap[0].nr_free, BITS_PER_PAGE - 1);
+	atomic_set(&ns->nr_pids, 0);
 
 	for (i = 1; i < PIDMAP_ENTRIES; i++)
 		atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE);
-- 
1.5.6.5

--
Previous message: [thread] [date] [author]
Next message: [thread] [date] [author]

Messages in current thread:
[PATCH 0/4] pid_ns_prepare_proc/unshare cleanups, Oleg Nesterov, (Sat Jun 19, 12:08 pm)
[PATCH 2/4] procfs: kill the global proc_mnt variable, Oleg Nesterov, (Sat Jun 19, 12:10 pm)
[PATCH 0/6] Unshare support for the pid namespace., Eric W. Biederman, (Sun Jun 20, 1:42 am)
[PATCH 1/6] pid: Remove the child_reaper special case in ..., Eric W. Biederman, (Sun Jun 20, 1:44 am)
[PATCH 2/6] pidns: Call pid_ns_prepare_proc from create_pi ..., Eric W. Biederman, (Sun Jun 20, 1:45 am)
[PATCH 3/6] procfs: kill the global proc_mnt variable, Eric W. Biederman, (Sun Jun 20, 1:45 am)
[PATCH 4/6] pidns: Don't allow new pids after the namespac ..., Eric W. Biederman, (Sun Jun 20, 1:47 am)
[PATCH 5/6] pidns: Use task_active_pid_ns where appropriate, Eric W. Biederman, (Sun Jun 20, 1:48 am)
[PATCH 6/6] pidns: Support unsharing the pid namespace., Eric W. Biederman, (Sun Jun 20, 1:49 am)
Re: [PATCH 0/6] Unshare support for the pid namespace., Oleg Nesterov, (Sun Jun 20, 11:03 am)
[PATCH 0/2] pid_ns_release_proc() fixes, Oleg Nesterov, (Sun Jun 20, 11:05 am)
Re: [PATCH 0/6] Unshare support for the pid namespace., Eric W. Biederman, (Sun Jun 20, 2:00 pm)
Re: [PATCH 0/6] Unshare support for the pid namespace., Oleg Nesterov, (Sun Jun 20, 2:48 pm)
Re: [PATCH 0/6] Unshare support for the pid namespace., Oleg Nesterov, (Sun Jun 20, 2:56 pm)
Re: [PATCH 6/6] pidns: Support unsharing the pid namespace., Eric W. Biederman, (Sun Jun 20, 6:53 pm)
Re: [PATCH 1/1] pid_ns: move pid_ns_release_proc() from pr ..., Sukadev Bhattiprolu, (Wed Jun 23, 11:36 pm)
Re: [PATCH 1/1] pid_ns: move pid_ns_release_proc() from pr ..., Eric W. Biederman, (Thu Jun 24, 12:06 am)
[PATCH] pid_ns: Fix proc_flush_task() accessing freed proc_mnt, Louis Rilling, (Thu Jun 24, 1:37 am)
Re: [RESEND PATCH] pid_ns: Fix proc_flush_task() accessing ..., Sukadev Bhattiprolu, (Fri Jun 25, 11:37 am)
Re: [RESEND PATCH] pid_ns: Fix proc_flush_task() accessing ..., Sukadev Bhattiprolu, (Fri Jun 25, 2:26 pm)
Re: [RESEND PATCH] pid_ns: Fix proc_flush_task() accessing ..., Sukadev Bhattiprolu, (Fri Jun 25, 3:07 pm)
[PATCH 01/24] pidns: Remove races by stopping the caching ..., Eric W. Biederman, (Fri Jul 9, 8:58 am)
Re: [PATCH 01/24] pidns: Remove races by stopping the cach ..., Eric W. Biederman, (Sun Jul 11, 7:25 am)
[PATCH] pidns: Fix wait for zombies to be reaped in zap_pi ..., Eric W. Biederman, (Mon Jul 12, 11:09 am)
Re: [PATCH] pidns: Fix wait for zombies to be reaped in za ..., Eric W. Biederman, (Tue Jul 13, 6:47 pm)
Re: [PATCH] pidns: Fix wait for zombies to be reaped in za ..., Sukadev Bhattiprolu, (Wed Jul 14, 1:53 pm)
Re: [PATCH] pidns: Fix wait for zombies to be reaped in za ..., Eric W. Biederman, (Wed Jul 14, 2:35 pm)
Re: [PATCH] pidns: Fix wait for zombies to be reaped in za ..., Sukadev Bhattiprolu, (Sat Oct 30, 12:07 am)