From d5b1fe68baa7213f198e5be8cd1a1037258ab2c8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 28 Dec 2012 13:18:28 -0800 Subject: cgroup: remove unused dummy cgroup_fork_callbacks() 5edee61ede ("cgroup: cgroup_subsys->fork() should be called after the task is added to css_set") removed cgroup_fork_callbacks() but forgot to remove its dummy version for !CONFIG_CGROUPS. Remove it. Signed-off-by: Tejun Heo Reported-by: Herton Ronaldo Krzesinski --- include/linux/cgroup.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 7d73905dcba2..942e68705577 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -706,7 +706,6 @@ struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id); static inline int cgroup_init_early(void) { return 0; } static inline int cgroup_init(void) { return 0; } static inline void cgroup_fork(struct task_struct *p) {} -static inline void cgroup_fork_callbacks(struct task_struct *p) {} static inline void cgroup_post_fork(struct task_struct *p) {} static inline void cgroup_exit(struct task_struct *p, int callbacks) {} -- cgit v1.2.3 From 12a9d2fef1d35770d3cdc2cd1faabb83c45bc0fa Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 7 Jan 2013 08:49:33 -0800 Subject: cgroup: implement cgroup_rightmost_descendant() Implement cgroup_rightmost_descendant() which returns the right most descendant of the specified cgroup. This can be used to skip the cgroup's subtree while iterating with cgroup_for_each_descendant_pre(). Signed-off-by: Tejun Heo Acked-by: Michal Hocko Acked-by: Li Zefan --- include/linux/cgroup.h | 1 + kernel/cgroup.c | 26 ++++++++++++++++++++++++++ 2 files changed, 27 insertions(+) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 942e68705577..8118a3120378 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -558,6 +558,7 @@ static inline struct cgroup* task_cgroup(struct task_struct *task, struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos, struct cgroup *cgroup); +struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos); /** * cgroup_for_each_descendant_pre - pre-order walk of a cgroup's descendants diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 4855892798fd..6643f7053454 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -3017,6 +3017,32 @@ struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos, } EXPORT_SYMBOL_GPL(cgroup_next_descendant_pre); +/** + * cgroup_rightmost_descendant - return the rightmost descendant of a cgroup + * @pos: cgroup of interest + * + * Return the rightmost descendant of @pos. If there's no descendant, + * @pos is returned. This can be used during pre-order traversal to skip + * subtree of @pos. + */ +struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos) +{ + struct cgroup *last, *tmp; + + WARN_ON_ONCE(!rcu_read_lock_held()); + + do { + last = pos; + /* ->prev isn't RCU safe, walk ->next till the end */ + pos = NULL; + list_for_each_entry_rcu(tmp, &last->children, sibling) + pos = tmp; + } while (pos); + + return last; +} +EXPORT_SYMBOL_GPL(cgroup_rightmost_descendant); + static struct cgroup *cgroup_leftmost_descendant(struct cgroup *pos) { struct cgroup *last; -- cgit v1.2.3 From ace783b9bbfa2182b4a561498db3f09a0c56bc79 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 24 Jan 2013 14:30:48 +0800 Subject: sched: split out css_online/css_offline from tg creation/destruction This is a preparaton for later patches. - What do we gain from cpu_cgroup_css_online(): After ss->css_alloc() and before ss->css_online(), there's a small window that tg->css.cgroup is NULL. With this change, tg won't be seen before ss->css_online(), where it's added to the global list, so we're guaranteed we'll never see NULL tg->css.cgroup. - What do we gain from cpu_cgroup_css_offline(): tg is freed via RCU, so is cgroup. Without this change, This is how synchronization works: cgroup_rmdir() no ss->css_offline() diput() syncornize_rcu() ss->css_free() <-- unregister tg, and free it via call_rcu() kfree_rcu(cgroup) <-- wait possible refs to cgroup, and free cgroup We can't just kfree(cgroup), because tg might access tg->css.cgroup. With this change: cgroup_rmdir() ss->css_offline() <-- unregister tg diput() synchronize_rcu() <-- wait possible refs to tg and cgroup ss->css_free() <-- free tg kfree_rcu(cgroup) <-- free cgroup As you see, kfree_rcu() is redundant now. Signed-off-by: Li Zefan Signed-off-by: Tejun Heo Acked-by: Ingo Molnar --- include/linux/sched.h | 3 +++ kernel/sched/auto_group.c | 3 +++ kernel/sched/core.c | 49 +++++++++++++++++++++++++++++++++++++---------- 3 files changed, 45 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 206bb089c06b..577eb973de7a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2750,7 +2750,10 @@ extern void normalize_rt_tasks(void); extern struct task_group root_task_group; extern struct task_group *sched_create_group(struct task_group *parent); +extern void sched_online_group(struct task_group *tg, + struct task_group *parent); extern void sched_destroy_group(struct task_group *tg); +extern void sched_offline_group(struct task_group *tg); extern void sched_move_task(struct task_struct *tsk); #ifdef CONFIG_FAIR_GROUP_SCHED extern int sched_group_set_shares(struct task_group *tg, unsigned long shares); diff --git a/kernel/sched/auto_group.c b/kernel/sched/auto_group.c index 0984a21076a3..64de5f8b0c9e 100644 --- a/kernel/sched/auto_group.c +++ b/kernel/sched/auto_group.c @@ -35,6 +35,7 @@ static inline void autogroup_destroy(struct kref *kref) ag->tg->rt_se = NULL; ag->tg->rt_rq = NULL; #endif + sched_offline_group(ag->tg); sched_destroy_group(ag->tg); } @@ -76,6 +77,8 @@ static inline struct autogroup *autogroup_create(void) if (IS_ERR(tg)) goto out_free; + sched_online_group(tg, &root_task_group); + kref_init(&ag->kref); init_rwsem(&ag->lock); ag->id = atomic_inc_return(&autogroup_seq_nr); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 257002c13bb0..106167243d68 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7159,7 +7159,6 @@ static void free_sched_group(struct task_group *tg) struct task_group *sched_create_group(struct task_group *parent) { struct task_group *tg; - unsigned long flags; tg = kzalloc(sizeof(*tg), GFP_KERNEL); if (!tg) @@ -7171,6 +7170,17 @@ struct task_group *sched_create_group(struct task_group *parent) if (!alloc_rt_sched_group(tg, parent)) goto err; + return tg; + +err: + free_sched_group(tg); + return ERR_PTR(-ENOMEM); +} + +void sched_online_group(struct task_group *tg, struct task_group *parent) +{ + unsigned long flags; + spin_lock_irqsave(&task_group_lock, flags); list_add_rcu(&tg->list, &task_groups); @@ -7180,12 +7190,6 @@ struct task_group *sched_create_group(struct task_group *parent) INIT_LIST_HEAD(&tg->children); list_add_rcu(&tg->siblings, &parent->children); spin_unlock_irqrestore(&task_group_lock, flags); - - return tg; - -err: - free_sched_group(tg); - return ERR_PTR(-ENOMEM); } /* rcu callback to free various structures associated with a task group */ @@ -7197,6 +7201,12 @@ static void free_sched_group_rcu(struct rcu_head *rhp) /* Destroy runqueue etc associated with a task group */ void sched_destroy_group(struct task_group *tg) +{ + /* wait for possible concurrent references to cfs_rqs complete */ + call_rcu(&tg->rcu, free_sched_group_rcu); +} + +void sched_offline_group(struct task_group *tg) { unsigned long flags; int i; @@ -7209,9 +7219,6 @@ void sched_destroy_group(struct task_group *tg) list_del_rcu(&tg->list); list_del_rcu(&tg->siblings); spin_unlock_irqrestore(&task_group_lock, flags); - - /* wait for possible concurrent references to cfs_rqs complete */ - call_rcu(&tg->rcu, free_sched_group_rcu); } /* change task's runqueue when it moves between groups. @@ -7563,6 +7570,19 @@ static struct cgroup_subsys_state *cpu_cgroup_css_alloc(struct cgroup *cgrp) return &tg->css; } +static int cpu_cgroup_css_online(struct cgroup *cgrp) +{ + struct task_group *tg = cgroup_tg(cgrp); + struct task_group *parent; + + if (!cgrp->parent) + return 0; + + parent = cgroup_tg(cgrp->parent); + sched_online_group(tg, parent); + return 0; +} + static void cpu_cgroup_css_free(struct cgroup *cgrp) { struct task_group *tg = cgroup_tg(cgrp); @@ -7570,6 +7590,13 @@ static void cpu_cgroup_css_free(struct cgroup *cgrp) sched_destroy_group(tg); } +static void cpu_cgroup_css_offline(struct cgroup *cgrp) +{ + struct task_group *tg = cgroup_tg(cgrp); + + sched_offline_group(tg); +} + static int cpu_cgroup_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) { @@ -7925,6 +7952,8 @@ struct cgroup_subsys cpu_cgroup_subsys = { .name = "cpu", .css_alloc = cpu_cgroup_css_alloc, .css_free = cpu_cgroup_css_free, + .css_online = cpu_cgroup_css_online, + .css_offline = cpu_cgroup_css_offline, .can_attach = cpu_cgroup_can_attach, .attach = cpu_cgroup_attach, .exit = cpu_cgroup_exit, -- cgit v1.2.3 From be44562613851235d801d41d5b3976dc4333f622 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 24 Jan 2013 14:31:42 +0800 Subject: cgroup: remove synchronize_rcu() from cgroup_diput() Free cgroup via call_rcu(). The actual work is done through workqueue. Signed-off-by: Li Zefan Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 1 + kernel/cgroup.c | 72 ++++++++++++++++++++++++++++++-------------------- 2 files changed, 44 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 8118a3120378..900af5964f55 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -203,6 +203,7 @@ struct cgroup { /* For RCU-protected deletion */ struct rcu_head rcu_head; + struct work_struct free_work; /* List of events which userspace want to receive */ struct list_head event_list; diff --git a/kernel/cgroup.c b/kernel/cgroup.c index af993919aa04..02e4f201472e 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -852,12 +852,52 @@ static struct inode *cgroup_new_inode(umode_t mode, struct super_block *sb) return inode; } +static void cgroup_free_fn(struct work_struct *work) +{ + struct cgroup *cgrp = container_of(work, struct cgroup, free_work); + struct cgroup_subsys *ss; + + mutex_lock(&cgroup_mutex); + /* + * Release the subsystem state objects. + */ + for_each_subsys(cgrp->root, ss) + ss->css_free(cgrp); + + cgrp->root->number_of_cgroups--; + mutex_unlock(&cgroup_mutex); + + /* + * Drop the active superblock reference that we took when we + * created the cgroup + */ + deactivate_super(cgrp->root->sb); + + /* + * if we're getting rid of the cgroup, refcount should ensure + * that there are no pidlists left. + */ + BUG_ON(!list_empty(&cgrp->pidlists)); + + simple_xattrs_free(&cgrp->xattrs); + + ida_simple_remove(&cgrp->root->cgroup_ida, cgrp->id); + kfree(cgrp); +} + +static void cgroup_free_rcu(struct rcu_head *head) +{ + struct cgroup *cgrp = container_of(head, struct cgroup, rcu_head); + + schedule_work(&cgrp->free_work); +} + static void cgroup_diput(struct dentry *dentry, struct inode *inode) { /* is dentry a directory ? if so, kfree() associated cgroup */ if (S_ISDIR(inode->i_mode)) { struct cgroup *cgrp = dentry->d_fsdata; - struct cgroup_subsys *ss; + BUG_ON(!(cgroup_is_removed(cgrp))); /* It's possible for external users to be holding css * reference counts on a cgroup; css_put() needs to @@ -865,34 +905,7 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode) * the reference count in order to know if it needs to * queue the cgroup to be handled by the release * agent */ - synchronize_rcu(); - - mutex_lock(&cgroup_mutex); - /* - * Release the subsystem state objects. - */ - for_each_subsys(cgrp->root, ss) - ss->css_free(cgrp); - - cgrp->root->number_of_cgroups--; - mutex_unlock(&cgroup_mutex); - - /* - * Drop the active superblock reference that we took when we - * created the cgroup - */ - deactivate_super(cgrp->root->sb); - - /* - * if we're getting rid of the cgroup, refcount should ensure - * that there are no pidlists left. - */ - BUG_ON(!list_empty(&cgrp->pidlists)); - - simple_xattrs_free(&cgrp->xattrs); - - ida_simple_remove(&cgrp->root->cgroup_ida, cgrp->id); - kfree(cgrp); + call_rcu(&cgrp->rcu_head, cgroup_free_rcu); } else { struct cfent *cfe = __d_cfe(dentry); struct cgroup *cgrp = dentry->d_parent->d_fsdata; @@ -1391,6 +1404,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) INIT_LIST_HEAD(&cgrp->allcg_node); INIT_LIST_HEAD(&cgrp->release_list); INIT_LIST_HEAD(&cgrp->pidlists); + INIT_WORK(&cgrp->free_work, cgroup_free_fn); mutex_init(&cgrp->pidlist_mutex); INIT_LIST_HEAD(&cgrp->event_list); spin_lock_init(&cgrp->event_list_lock); -- cgit v1.2.3