cgroups: fix probable race with put_css_set[_taskexit] and find_css_set

Previous message: [thread] [date] [author]
Next message: [thread] [date] [author]
From: Linux Kernel Mailing List
Date: Monday, October 20, 2008 - 10:03 am

Gitweb:     http://git.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=146aa1...
Commit:     146aa1bd0511f88ddb4e92fafa2b8aad4f2f65f3
Parent:     248736c2a57206388c86f8cdd3392ee986e84f9f
Author:     Lai Jiangshan <laijs@cn.fujitsu.com>
AuthorDate: Sat Oct 18 20:28:03 2008 -0700
Committer:  Linus Torvalds <torvalds@linux-foundation.org>
CommitDate: Mon Oct 20 08:52:38 2008 -0700

    cgroups: fix probable race with put_css_set[_taskexit] and find_css_set
    
    put_css_set_taskexit may be called when find_css_set is called on other
    cpu.  And the race will occur:
    
    put_css_set_taskexit side                    find_css_set side
    
                                            |
    atomic_dec_and_test(&kref->refcount)    |
        /* kref->refcount = 0 */            |
    ....................................................................
                                            |  read_lock(&css_set_lock)
                                            |  find_existing_css_set
                                            |  get_css_set
                                            |  read_unlock(&css_set_lock);
    ....................................................................
    __release_css_set                       |
    ....................................................................
                                            | /* use a released css_set */
                                            |
    
    [put_css_set is the same. But in the current code, all put_css_set are
    put into cgroup mutex critical region as the same as find_css_set.]
    
    [akpm@linux-foundation.org: repair comments]
    [menage@google.com: eliminate race in css_set refcounting]
    Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
    Cc: Balbir Singh <balbir@in.ibm.com>
    Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
    Signed-off-by: Paul Menage <menage@google.com>
    Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
    Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cgroup.h |    3 +--
 kernel/cgroup.c        |   43 ++++++++++++++++++++-----------------------
 kernel/cgroup_debug.c  |    4 ++--
 3 files changed, 23 insertions(+), 27 deletions(-)

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 30934e4..7166023 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -9,7 +9,6 @@
  */
 
 #include <linux/sched.h>
-#include <linux/kref.h>
 #include <linux/cpumask.h>
 #include <linux/nodemask.h>
 #include <linux/rcupdate.h>
@@ -149,7 +148,7 @@ struct cgroup {
 struct css_set {
 
 	/* Reference count */
-	struct kref ref;
+	atomic_t refcount;
 
 	/*
 	 * List running through all cgroup groups in the same hash
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 8c6e1c1..1e49218 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -241,7 +241,6 @@ static void unlink_css_set(struct css_set *cg)
 	struct cg_cgroup_link *link;
 	struct cg_cgroup_link *saved_link;
 
-	write_lock(&css_set_lock);
 	hlist_del(&cg->hlist);
 	css_set_count--;
 
@@ -251,16 +250,25 @@ static void unlink_css_set(struct css_set *cg)
 		list_del(&link->cgrp_link_list);
 		kfree(link);
 	}
-
-	write_unlock(&css_set_lock);
 }
 
-static void __release_css_set(struct kref *k, int taskexit)
+static void __put_css_set(struct css_set *cg, int taskexit)
 {
 	int i;
-	struct css_set *cg = container_of(k, struct css_set, ref);
-
+	/*
+	 * Ensure that the refcount doesn't hit zero while any readers
+	 * can see it. Similar to atomic_dec_and_lock(), but for an
+	 * rwlock
+	 */
+	if (atomic_add_unless(&cg->refcount, -1, 1))
+		return;
+	write_lock(&css_set_lock);
+	if (!atomic_dec_and_test(&cg->refcount)) {
+		write_unlock(&css_set_lock);
+		return;
+	}
 	unlink_css_set(cg);
+	write_unlock(&css_set_lock);
 
 	rcu_read_lock();
 	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
@@ -276,32 +284,22 @@ static void __release_css_set(struct kref *k, int taskexit)
 	kfree(cg);
 }
 
-static void release_css_set(struct kref *k)
-{
-	__release_css_set(k, 0);
-}
-
-static void release_css_set_taskexit(struct kref *k)
-{
-	__release_css_set(k, 1);
-}
-
 /*
  * refcounted get/put for css_set objects
  */
 static inline void get_css_set(struct css_set *cg)
 {
-	kref_get(&cg->ref);
+	atomic_inc(&cg->refcount);
 }
 
 static inline void put_css_set(struct css_set *cg)
 {
-	kref_put(&cg->ref, release_css_set);
+	__put_css_set(cg, 0);
 }
 
 static inline void put_css_set_taskexit(struct css_set *cg)
 {
-	kref_put(&cg->ref, release_css_set_taskexit);
+	__put_css_set(cg, 1);
 }
 
 /*
@@ -427,7 +425,7 @@ static struct css_set *find_css_set(
 		return NULL;
 	}
 
-	kref_init(&res->ref);
+	atomic_set(&res->refcount, 1);
 	INIT_LIST_HEAD(&res->cg_links);
 	INIT_LIST_HEAD(&res->tasks);
 	INIT_HLIST_NODE(&res->hlist);
@@ -1728,7 +1726,7 @@ int cgroup_task_count(const struct cgroup *cgrp)
 
 	read_lock(&css_set_lock);
 	list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) {
-		count += atomic_read(&link->cg->ref.refcount);
+		count += atomic_read(&link->cg->refcount);
 	}
 	read_unlock(&css_set_lock);
 	return count;
@@ -2495,8 +2493,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
 int __init cgroup_init_early(void)
 {
 	int i;
-	kref_init(&init_css_set.ref);
-	kref_get(&init_css_set.ref);
+	atomic_set(&init_css_set.refcount, 1);
 	INIT_LIST_HEAD(&init_css_set.cg_links);
 	INIT_LIST_HEAD(&init_css_set.tasks);
 	INIT_HLIST_NODE(&init_css_set.hlist);
diff --git a/kernel/cgroup_debug.c b/kernel/cgroup_debug.c
index c3dc3ab..daca620 100644
--- a/kernel/cgroup_debug.c
+++ b/kernel/cgroup_debug.c
@@ -57,7 +57,7 @@ static u64 current_css_set_refcount_read(struct cgroup *cont,
 	u64 count;
 
 	rcu_read_lock();
-	count = atomic_read(&current->cgroups->ref.refcount);
+	count = atomic_read(&current->cgroups->refcount);
 	rcu_read_unlock();
 	return count;
 }
@@ -90,7 +90,7 @@ static struct cftype files[] =  {
 	{
 		.name = "releasable",
 		.read_u64 = releasable_read,
-	}
+	},
 };
 
 static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont)
--
To unsubscribe from this list: send the line "unsubscribe git-commits-head" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Previous message: [thread] [date] [author]
Next message: [thread] [date] [author]

Messages in current thread:
cgroups: fix probable race with put_css_set[_taskexit] and ..., Linux Kernel Mailing ..., (Mon Oct 20, 10:03 am)