cgroups: add an owner to the mm_struct
Remove the mem_cgroup member from mm_struct and instead adds an owner. This approach was suggested by Paul Menage. The advantage of this approach is that, once the mm->owner is known, using the subsystem id, the cgroup can be determined. It also allows several control groups that are virtually grouped by mm_struct, to exist independent of the memory controller i.e., without adding mem_cgroup's for each controller, to mm_struct. A new config option CONFIG_MM_OWNER is added and the memory resource controller selects this config option. This patch also adds cgroup callbacks to notify subsystems when mm->owner changes. The mm_cgroup_changed callback is called with the task_lock() of the new task held and is called just prior to changing the mm->owner. I am indebted to Paul Menage for the several reviews of this patchset and helping me make it lighter and simpler. This patch was tested on a powerpc box, it was compiled with both the MM_OWNER config turned on and off. After the thread group leader exits, it's moved to init_css_state by cgroup_exit(), thus all future charges from runnings threads would be redirected to the init_css_set's subsystem. Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com> Cc: Pavel Emelianov <xemul@openvz.org> Cc: Hugh Dickins <hugh@veritas.com> Cc: Sudhir Kumar <skumar@linux.vnet.ibm.com> Cc: YAMAMOTO Takashi <yamamoto@valinux.co.jp> Cc: Hirokazu Takahashi <taka@valinux.co.jp> Cc: David Rientjes <rientjes@google.com>, Cc: Balbir Singh <balbir@linux.vnet.ibm.com> Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Acked-by: Pekka Enberg <penberg@cs.helsinki.fi> Reviewed-by: Paul Menage <menage@google.com> Cc: Oleg Nesterov <oleg@tv-sign.ru> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
committed by
Linus Torvalds
parent
29486df325
commit
cf475ad28a
@@ -119,6 +119,7 @@ static int root_count;
|
||||
* be called.
|
||||
*/
|
||||
static int need_forkexit_callback;
|
||||
static int need_mm_owner_callback __read_mostly;
|
||||
|
||||
/* convenient tests for these bits */
|
||||
inline int cgroup_is_removed(const struct cgroup *cgrp)
|
||||
@@ -2498,6 +2499,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
|
||||
init_css_set.subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id];
|
||||
|
||||
need_forkexit_callback |= ss->fork || ss->exit;
|
||||
need_mm_owner_callback |= !!ss->mm_owner_changed;
|
||||
|
||||
/* At system boot, before all subsystems have been
|
||||
* registered, no tasks have been forked, so we don't
|
||||
@@ -2748,6 +2750,34 @@ void cgroup_fork_callbacks(struct task_struct *child)
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MM_OWNER
|
||||
/**
|
||||
* cgroup_mm_owner_callbacks - run callbacks when the mm->owner changes
|
||||
* @p: the new owner
|
||||
*
|
||||
* Called on every change to mm->owner. mm_init_owner() does not
|
||||
* invoke this routine, since it assigns the mm->owner the first time
|
||||
* and does not change it.
|
||||
*/
|
||||
void cgroup_mm_owner_callbacks(struct task_struct *old, struct task_struct *new)
|
||||
{
|
||||
struct cgroup *oldcgrp, *newcgrp;
|
||||
|
||||
if (need_mm_owner_callback) {
|
||||
int i;
|
||||
for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
|
||||
struct cgroup_subsys *ss = subsys[i];
|
||||
oldcgrp = task_cgroup(old, ss->subsys_id);
|
||||
newcgrp = task_cgroup(new, ss->subsys_id);
|
||||
if (oldcgrp == newcgrp)
|
||||
continue;
|
||||
if (ss->mm_owner_changed)
|
||||
ss->mm_owner_changed(ss, oldcgrp, newcgrp);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif /* CONFIG_MM_OWNER */
|
||||
|
||||
/**
|
||||
* cgroup_post_fork - called on a new task after adding it to the task list
|
||||
* @child: the task in question
|
||||
|
||||
@@ -557,6 +557,88 @@ void exit_fs(struct task_struct *tsk)
|
||||
|
||||
EXPORT_SYMBOL_GPL(exit_fs);
|
||||
|
||||
#ifdef CONFIG_MM_OWNER
|
||||
/*
|
||||
* Task p is exiting and it owned mm, lets find a new owner for it
|
||||
*/
|
||||
static inline int
|
||||
mm_need_new_owner(struct mm_struct *mm, struct task_struct *p)
|
||||
{
|
||||
/*
|
||||
* If there are other users of the mm and the owner (us) is exiting
|
||||
* we need to find a new owner to take on the responsibility.
|
||||
*/
|
||||
if (!mm)
|
||||
return 0;
|
||||
if (atomic_read(&mm->mm_users) <= 1)
|
||||
return 0;
|
||||
if (mm->owner != p)
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
void mm_update_next_owner(struct mm_struct *mm)
|
||||
{
|
||||
struct task_struct *c, *g, *p = current;
|
||||
|
||||
retry:
|
||||
if (!mm_need_new_owner(mm, p))
|
||||
return;
|
||||
|
||||
read_lock(&tasklist_lock);
|
||||
/*
|
||||
* Search in the children
|
||||
*/
|
||||
list_for_each_entry(c, &p->children, sibling) {
|
||||
if (c->mm == mm)
|
||||
goto assign_new_owner;
|
||||
}
|
||||
|
||||
/*
|
||||
* Search in the siblings
|
||||
*/
|
||||
list_for_each_entry(c, &p->parent->children, sibling) {
|
||||
if (c->mm == mm)
|
||||
goto assign_new_owner;
|
||||
}
|
||||
|
||||
/*
|
||||
* Search through everything else. We should not get
|
||||
* here often
|
||||
*/
|
||||
do_each_thread(g, c) {
|
||||
if (c->mm == mm)
|
||||
goto assign_new_owner;
|
||||
} while_each_thread(g, c);
|
||||
|
||||
read_unlock(&tasklist_lock);
|
||||
return;
|
||||
|
||||
assign_new_owner:
|
||||
BUG_ON(c == p);
|
||||
get_task_struct(c);
|
||||
/*
|
||||
* The task_lock protects c->mm from changing.
|
||||
* We always want mm->owner->mm == mm
|
||||
*/
|
||||
task_lock(c);
|
||||
/*
|
||||
* Delay read_unlock() till we have the task_lock()
|
||||
* to ensure that c does not slip away underneath us
|
||||
*/
|
||||
read_unlock(&tasklist_lock);
|
||||
if (c->mm != mm) {
|
||||
task_unlock(c);
|
||||
put_task_struct(c);
|
||||
goto retry;
|
||||
}
|
||||
cgroup_mm_owner_callbacks(mm->owner, c);
|
||||
mm->owner = c;
|
||||
task_unlock(c);
|
||||
put_task_struct(c);
|
||||
}
|
||||
#endif /* CONFIG_MM_OWNER */
|
||||
|
||||
/*
|
||||
* Turn us into a lazy TLB process if we
|
||||
* aren't already..
|
||||
@@ -596,6 +678,7 @@ static void exit_mm(struct task_struct * tsk)
|
||||
/* We don't want this task to be frozen prematurely */
|
||||
clear_freeze_flag(tsk);
|
||||
task_unlock(tsk);
|
||||
mm_update_next_owner(mm);
|
||||
mmput(mm);
|
||||
}
|
||||
|
||||
|
||||
+8
-3
@@ -381,14 +381,13 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
|
||||
mm->ioctx_list = NULL;
|
||||
mm->free_area_cache = TASK_UNMAPPED_BASE;
|
||||
mm->cached_hole_size = ~0UL;
|
||||
mm_init_cgroup(mm, p);
|
||||
mm_init_owner(mm, p);
|
||||
|
||||
if (likely(!mm_alloc_pgd(mm))) {
|
||||
mm->def_flags = 0;
|
||||
return mm;
|
||||
}
|
||||
|
||||
mm_free_cgroup(mm);
|
||||
free_mm(mm);
|
||||
return NULL;
|
||||
}
|
||||
@@ -438,7 +437,6 @@ void mmput(struct mm_struct *mm)
|
||||
spin_unlock(&mmlist_lock);
|
||||
}
|
||||
put_swap_token(mm);
|
||||
mm_free_cgroup(mm);
|
||||
mmdrop(mm);
|
||||
}
|
||||
}
|
||||
@@ -982,6 +980,13 @@ static void rt_mutex_init_task(struct task_struct *p)
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MM_OWNER
|
||||
void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
|
||||
{
|
||||
mm->owner = p;
|
||||
}
|
||||
#endif /* CONFIG_MM_OWNER */
|
||||
|
||||
/*
|
||||
* This creates a new process as a copy of the old one,
|
||||
* but does not actually start it yet.
|
||||
|
||||
Reference in New Issue
Block a user