Merge branch 'perf/core' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip into trace/tip/tracing/core-7
Conflicts: include/linux/ftrace_event.h include/trace/ftrace.h kernel/trace/trace_event_perf.c kernel/trace/trace_kprobe.c kernel/trace/trace_syscalls.c Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
This commit is contained in:
+9
-8
@@ -353,17 +353,18 @@ restart:
|
||||
|
||||
void acct_exit_ns(struct pid_namespace *ns)
|
||||
{
|
||||
struct bsd_acct_struct *acct;
|
||||
struct bsd_acct_struct *acct = ns->bacct;
|
||||
|
||||
if (acct == NULL)
|
||||
return;
|
||||
|
||||
del_timer_sync(&acct->timer);
|
||||
spin_lock(&acct_lock);
|
||||
acct = ns->bacct;
|
||||
if (acct != NULL) {
|
||||
if (acct->file != NULL)
|
||||
acct_file_reopen(acct, NULL, NULL);
|
||||
|
||||
kfree(acct);
|
||||
}
|
||||
if (acct->file != NULL)
|
||||
acct_file_reopen(acct, NULL, NULL);
|
||||
spin_unlock(&acct_lock);
|
||||
|
||||
kfree(acct);
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
+50
-12
@@ -1646,7 +1646,9 @@ static inline struct cftype *__d_cft(struct dentry *dentry)
|
||||
int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
|
||||
{
|
||||
char *start;
|
||||
struct dentry *dentry = rcu_dereference(cgrp->dentry);
|
||||
struct dentry *dentry = rcu_dereference_check(cgrp->dentry,
|
||||
rcu_read_lock_held() ||
|
||||
cgroup_lock_is_held());
|
||||
|
||||
if (!dentry || cgrp == dummytop) {
|
||||
/*
|
||||
@@ -1662,13 +1664,17 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
|
||||
*--start = '\0';
|
||||
for (;;) {
|
||||
int len = dentry->d_name.len;
|
||||
|
||||
if ((start -= len) < buf)
|
||||
return -ENAMETOOLONG;
|
||||
memcpy(start, cgrp->dentry->d_name.name, len);
|
||||
memcpy(start, dentry->d_name.name, len);
|
||||
cgrp = cgrp->parent;
|
||||
if (!cgrp)
|
||||
break;
|
||||
dentry = rcu_dereference(cgrp->dentry);
|
||||
|
||||
dentry = rcu_dereference_check(cgrp->dentry,
|
||||
rcu_read_lock_held() ||
|
||||
cgroup_lock_is_held());
|
||||
if (!cgrp->parent)
|
||||
continue;
|
||||
if (--start < buf)
|
||||
@@ -4429,7 +4435,15 @@ __setup("cgroup_disable=", cgroup_disable);
|
||||
*/
|
||||
unsigned short css_id(struct cgroup_subsys_state *css)
|
||||
{
|
||||
struct css_id *cssid = rcu_dereference(css->id);
|
||||
struct css_id *cssid;
|
||||
|
||||
/*
|
||||
* This css_id() can return correct value when somone has refcnt
|
||||
* on this or this is under rcu_read_lock(). Once css->id is allocated,
|
||||
* it's unchanged until freed.
|
||||
*/
|
||||
cssid = rcu_dereference_check(css->id,
|
||||
rcu_read_lock_held() || atomic_read(&css->refcnt));
|
||||
|
||||
if (cssid)
|
||||
return cssid->id;
|
||||
@@ -4439,7 +4453,10 @@ EXPORT_SYMBOL_GPL(css_id);
|
||||
|
||||
unsigned short css_depth(struct cgroup_subsys_state *css)
|
||||
{
|
||||
struct css_id *cssid = rcu_dereference(css->id);
|
||||
struct css_id *cssid;
|
||||
|
||||
cssid = rcu_dereference_check(css->id,
|
||||
rcu_read_lock_held() || atomic_read(&css->refcnt));
|
||||
|
||||
if (cssid)
|
||||
return cssid->depth;
|
||||
@@ -4447,15 +4464,36 @@ unsigned short css_depth(struct cgroup_subsys_state *css)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(css_depth);
|
||||
|
||||
/**
|
||||
* css_is_ancestor - test "root" css is an ancestor of "child"
|
||||
* @child: the css to be tested.
|
||||
* @root: the css supporsed to be an ancestor of the child.
|
||||
*
|
||||
* Returns true if "root" is an ancestor of "child" in its hierarchy. Because
|
||||
* this function reads css->id, this use rcu_dereference() and rcu_read_lock().
|
||||
* But, considering usual usage, the csses should be valid objects after test.
|
||||
* Assuming that the caller will do some action to the child if this returns
|
||||
* returns true, the caller must take "child";s reference count.
|
||||
* If "child" is valid object and this returns true, "root" is valid, too.
|
||||
*/
|
||||
|
||||
bool css_is_ancestor(struct cgroup_subsys_state *child,
|
||||
const struct cgroup_subsys_state *root)
|
||||
{
|
||||
struct css_id *child_id = rcu_dereference(child->id);
|
||||
struct css_id *root_id = rcu_dereference(root->id);
|
||||
struct css_id *child_id;
|
||||
struct css_id *root_id;
|
||||
bool ret = true;
|
||||
|
||||
if (!child_id || !root_id || (child_id->depth < root_id->depth))
|
||||
return false;
|
||||
return child_id->stack[root_id->depth] == root_id->id;
|
||||
rcu_read_lock();
|
||||
child_id = rcu_dereference(child->id);
|
||||
root_id = rcu_dereference(root->id);
|
||||
if (!child_id
|
||||
|| !root_id
|
||||
|| (child_id->depth < root_id->depth)
|
||||
|| (child_id->stack[root_id->depth] != root_id->id))
|
||||
ret = false;
|
||||
rcu_read_unlock();
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void __free_css_id_cb(struct rcu_head *head)
|
||||
@@ -4555,13 +4593,13 @@ static int alloc_css_id(struct cgroup_subsys *ss, struct cgroup *parent,
|
||||
{
|
||||
int subsys_id, i, depth = 0;
|
||||
struct cgroup_subsys_state *parent_css, *child_css;
|
||||
struct css_id *child_id, *parent_id = NULL;
|
||||
struct css_id *child_id, *parent_id;
|
||||
|
||||
subsys_id = ss->subsys_id;
|
||||
parent_css = parent->subsys[subsys_id];
|
||||
child_css = child->subsys[subsys_id];
|
||||
depth = css_depth(parent_css) + 1;
|
||||
parent_id = parent_css->id;
|
||||
depth = parent_id->depth;
|
||||
|
||||
child_id = get_new_cssid(ss, depth);
|
||||
if (IS_ERR(child_id))
|
||||
|
||||
@@ -1111,7 +1111,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
|
||||
p->memcg_batch.do_batch = 0;
|
||||
p->memcg_batch.memcg = NULL;
|
||||
#endif
|
||||
p->stack_start = stack_start;
|
||||
|
||||
/* Perform scheduler related setup. Assign this task to a CPU. */
|
||||
sched_fork(p, clone_flags);
|
||||
|
||||
+2
-4
@@ -1134,11 +1134,9 @@ int crash_shrink_memory(unsigned long new_size)
|
||||
|
||||
free_reserved_phys_range(end, crashk_res.end);
|
||||
|
||||
if (start == end) {
|
||||
crashk_res.end = end;
|
||||
if (start == end)
|
||||
release_resource(&crashk_res);
|
||||
} else
|
||||
crashk_res.end = end - 1;
|
||||
crashk_res.end = end - 1;
|
||||
|
||||
unlock:
|
||||
mutex_unlock(&kexec_mutex);
|
||||
|
||||
+46
-38
@@ -431,20 +431,7 @@ static struct stack_trace lockdep_init_trace = {
|
||||
/*
|
||||
* Various lockdep statistics:
|
||||
*/
|
||||
atomic_t chain_lookup_hits;
|
||||
atomic_t chain_lookup_misses;
|
||||
atomic_t hardirqs_on_events;
|
||||
atomic_t hardirqs_off_events;
|
||||
atomic_t redundant_hardirqs_on;
|
||||
atomic_t redundant_hardirqs_off;
|
||||
atomic_t softirqs_on_events;
|
||||
atomic_t softirqs_off_events;
|
||||
atomic_t redundant_softirqs_on;
|
||||
atomic_t redundant_softirqs_off;
|
||||
atomic_t nr_unused_locks;
|
||||
atomic_t nr_cyclic_checks;
|
||||
atomic_t nr_find_usage_forwards_checks;
|
||||
atomic_t nr_find_usage_backwards_checks;
|
||||
DEFINE_PER_CPU(struct lockdep_stats, lockdep_stats);
|
||||
#endif
|
||||
|
||||
/*
|
||||
@@ -748,7 +735,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
|
||||
return NULL;
|
||||
}
|
||||
class = lock_classes + nr_lock_classes++;
|
||||
debug_atomic_inc(&nr_unused_locks);
|
||||
debug_atomic_inc(nr_unused_locks);
|
||||
class->key = key;
|
||||
class->name = lock->name;
|
||||
class->subclass = subclass;
|
||||
@@ -818,7 +805,8 @@ static struct lock_list *alloc_list_entry(void)
|
||||
* Add a new dependency to the head of the list:
|
||||
*/
|
||||
static int add_lock_to_list(struct lock_class *class, struct lock_class *this,
|
||||
struct list_head *head, unsigned long ip, int distance)
|
||||
struct list_head *head, unsigned long ip,
|
||||
int distance, struct stack_trace *trace)
|
||||
{
|
||||
struct lock_list *entry;
|
||||
/*
|
||||
@@ -829,11 +817,9 @@ static int add_lock_to_list(struct lock_class *class, struct lock_class *this,
|
||||
if (!entry)
|
||||
return 0;
|
||||
|
||||
if (!save_trace(&entry->trace))
|
||||
return 0;
|
||||
|
||||
entry->class = this;
|
||||
entry->distance = distance;
|
||||
entry->trace = *trace;
|
||||
/*
|
||||
* Since we never remove from the dependency list, the list can
|
||||
* be walked lockless by other CPUs, it's only allocation
|
||||
@@ -1205,7 +1191,7 @@ check_noncircular(struct lock_list *root, struct lock_class *target,
|
||||
{
|
||||
int result;
|
||||
|
||||
debug_atomic_inc(&nr_cyclic_checks);
|
||||
debug_atomic_inc(nr_cyclic_checks);
|
||||
|
||||
result = __bfs_forwards(root, target, class_equal, target_entry);
|
||||
|
||||
@@ -1242,7 +1228,7 @@ find_usage_forwards(struct lock_list *root, enum lock_usage_bit bit,
|
||||
{
|
||||
int result;
|
||||
|
||||
debug_atomic_inc(&nr_find_usage_forwards_checks);
|
||||
debug_atomic_inc(nr_find_usage_forwards_checks);
|
||||
|
||||
result = __bfs_forwards(root, (void *)bit, usage_match, target_entry);
|
||||
|
||||
@@ -1265,7 +1251,7 @@ find_usage_backwards(struct lock_list *root, enum lock_usage_bit bit,
|
||||
{
|
||||
int result;
|
||||
|
||||
debug_atomic_inc(&nr_find_usage_backwards_checks);
|
||||
debug_atomic_inc(nr_find_usage_backwards_checks);
|
||||
|
||||
result = __bfs_backwards(root, (void *)bit, usage_match, target_entry);
|
||||
|
||||
@@ -1635,12 +1621,20 @@ check_deadlock(struct task_struct *curr, struct held_lock *next,
|
||||
*/
|
||||
static int
|
||||
check_prev_add(struct task_struct *curr, struct held_lock *prev,
|
||||
struct held_lock *next, int distance)
|
||||
struct held_lock *next, int distance, int trylock_loop)
|
||||
{
|
||||
struct lock_list *entry;
|
||||
int ret;
|
||||
struct lock_list this;
|
||||
struct lock_list *uninitialized_var(target_entry);
|
||||
/*
|
||||
* Static variable, serialized by the graph_lock().
|
||||
*
|
||||
* We use this static variable to save the stack trace in case
|
||||
* we call into this function multiple times due to encountering
|
||||
* trylocks in the held lock stack.
|
||||
*/
|
||||
static struct stack_trace trace;
|
||||
|
||||
/*
|
||||
* Prove that the new <prev> -> <next> dependency would not
|
||||
@@ -1688,20 +1682,23 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
|
||||
}
|
||||
}
|
||||
|
||||
if (!trylock_loop && !save_trace(&trace))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Ok, all validations passed, add the new lock
|
||||
* to the previous lock's dependency list:
|
||||
*/
|
||||
ret = add_lock_to_list(hlock_class(prev), hlock_class(next),
|
||||
&hlock_class(prev)->locks_after,
|
||||
next->acquire_ip, distance);
|
||||
next->acquire_ip, distance, &trace);
|
||||
|
||||
if (!ret)
|
||||
return 0;
|
||||
|
||||
ret = add_lock_to_list(hlock_class(next), hlock_class(prev),
|
||||
&hlock_class(next)->locks_before,
|
||||
next->acquire_ip, distance);
|
||||
next->acquire_ip, distance, &trace);
|
||||
if (!ret)
|
||||
return 0;
|
||||
|
||||
@@ -1731,6 +1728,7 @@ static int
|
||||
check_prevs_add(struct task_struct *curr, struct held_lock *next)
|
||||
{
|
||||
int depth = curr->lockdep_depth;
|
||||
int trylock_loop = 0;
|
||||
struct held_lock *hlock;
|
||||
|
||||
/*
|
||||
@@ -1756,7 +1754,8 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next)
|
||||
* added:
|
||||
*/
|
||||
if (hlock->read != 2) {
|
||||
if (!check_prev_add(curr, hlock, next, distance))
|
||||
if (!check_prev_add(curr, hlock, next,
|
||||
distance, trylock_loop))
|
||||
return 0;
|
||||
/*
|
||||
* Stop after the first non-trylock entry,
|
||||
@@ -1779,6 +1778,7 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next)
|
||||
if (curr->held_locks[depth].irq_context !=
|
||||
curr->held_locks[depth-1].irq_context)
|
||||
break;
|
||||
trylock_loop = 1;
|
||||
}
|
||||
return 1;
|
||||
out_bug:
|
||||
@@ -1825,7 +1825,7 @@ static inline int lookup_chain_cache(struct task_struct *curr,
|
||||
list_for_each_entry(chain, hash_head, entry) {
|
||||
if (chain->chain_key == chain_key) {
|
||||
cache_hit:
|
||||
debug_atomic_inc(&chain_lookup_hits);
|
||||
debug_atomic_inc(chain_lookup_hits);
|
||||
if (very_verbose(class))
|
||||
printk("\nhash chain already cached, key: "
|
||||
"%016Lx tail class: [%p] %s\n",
|
||||
@@ -1890,7 +1890,7 @@ cache_hit:
|
||||
chain_hlocks[chain->base + j] = class - lock_classes;
|
||||
}
|
||||
list_add_tail_rcu(&chain->entry, hash_head);
|
||||
debug_atomic_inc(&chain_lookup_misses);
|
||||
debug_atomic_inc(chain_lookup_misses);
|
||||
inc_chains();
|
||||
|
||||
return 1;
|
||||
@@ -2311,7 +2311,12 @@ void trace_hardirqs_on_caller(unsigned long ip)
|
||||
return;
|
||||
|
||||
if (unlikely(curr->hardirqs_enabled)) {
|
||||
debug_atomic_inc(&redundant_hardirqs_on);
|
||||
/*
|
||||
* Neither irq nor preemption are disabled here
|
||||
* so this is racy by nature but loosing one hit
|
||||
* in a stat is not a big deal.
|
||||
*/
|
||||
__debug_atomic_inc(redundant_hardirqs_on);
|
||||
return;
|
||||
}
|
||||
/* we'll do an OFF -> ON transition: */
|
||||
@@ -2338,7 +2343,7 @@ void trace_hardirqs_on_caller(unsigned long ip)
|
||||
|
||||
curr->hardirq_enable_ip = ip;
|
||||
curr->hardirq_enable_event = ++curr->irq_events;
|
||||
debug_atomic_inc(&hardirqs_on_events);
|
||||
debug_atomic_inc(hardirqs_on_events);
|
||||
}
|
||||
EXPORT_SYMBOL(trace_hardirqs_on_caller);
|
||||
|
||||
@@ -2370,9 +2375,9 @@ void trace_hardirqs_off_caller(unsigned long ip)
|
||||
curr->hardirqs_enabled = 0;
|
||||
curr->hardirq_disable_ip = ip;
|
||||
curr->hardirq_disable_event = ++curr->irq_events;
|
||||
debug_atomic_inc(&hardirqs_off_events);
|
||||
debug_atomic_inc(hardirqs_off_events);
|
||||
} else
|
||||
debug_atomic_inc(&redundant_hardirqs_off);
|
||||
debug_atomic_inc(redundant_hardirqs_off);
|
||||
}
|
||||
EXPORT_SYMBOL(trace_hardirqs_off_caller);
|
||||
|
||||
@@ -2396,7 +2401,7 @@ void trace_softirqs_on(unsigned long ip)
|
||||
return;
|
||||
|
||||
if (curr->softirqs_enabled) {
|
||||
debug_atomic_inc(&redundant_softirqs_on);
|
||||
debug_atomic_inc(redundant_softirqs_on);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -2406,7 +2411,7 @@ void trace_softirqs_on(unsigned long ip)
|
||||
curr->softirqs_enabled = 1;
|
||||
curr->softirq_enable_ip = ip;
|
||||
curr->softirq_enable_event = ++curr->irq_events;
|
||||
debug_atomic_inc(&softirqs_on_events);
|
||||
debug_atomic_inc(softirqs_on_events);
|
||||
/*
|
||||
* We are going to turn softirqs on, so set the
|
||||
* usage bit for all held locks, if hardirqs are
|
||||
@@ -2436,10 +2441,10 @@ void trace_softirqs_off(unsigned long ip)
|
||||
curr->softirqs_enabled = 0;
|
||||
curr->softirq_disable_ip = ip;
|
||||
curr->softirq_disable_event = ++curr->irq_events;
|
||||
debug_atomic_inc(&softirqs_off_events);
|
||||
debug_atomic_inc(softirqs_off_events);
|
||||
DEBUG_LOCKS_WARN_ON(!softirq_count());
|
||||
} else
|
||||
debug_atomic_inc(&redundant_softirqs_off);
|
||||
debug_atomic_inc(redundant_softirqs_off);
|
||||
}
|
||||
|
||||
static void __lockdep_trace_alloc(gfp_t gfp_mask, unsigned long flags)
|
||||
@@ -2644,7 +2649,7 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
|
||||
return 0;
|
||||
break;
|
||||
case LOCK_USED:
|
||||
debug_atomic_dec(&nr_unused_locks);
|
||||
debug_atomic_dec(nr_unused_locks);
|
||||
break;
|
||||
default:
|
||||
if (!debug_locks_off_graph_unlock())
|
||||
@@ -2750,7 +2755,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
|
||||
if (!class)
|
||||
return 0;
|
||||
}
|
||||
debug_atomic_inc((atomic_t *)&class->ops);
|
||||
atomic_inc((atomic_t *)&class->ops);
|
||||
if (very_verbose(class)) {
|
||||
printk("\nacquire class [%p] %s", class->key, class->name);
|
||||
if (class->name_version > 1)
|
||||
@@ -3801,8 +3806,11 @@ void lockdep_rcu_dereference(const char *file, const int line)
|
||||
{
|
||||
struct task_struct *curr = current;
|
||||
|
||||
#ifndef CONFIG_PROVE_RCU_REPEATEDLY
|
||||
if (!debug_locks_off())
|
||||
return;
|
||||
#endif /* #ifdef CONFIG_PROVE_RCU_REPEATEDLY */
|
||||
/* Note: the following can be executed concurrently, so be careful. */
|
||||
printk("\n===================================================\n");
|
||||
printk( "[ INFO: suspicious rcu_dereference_check() usage. ]\n");
|
||||
printk( "---------------------------------------------------\n");
|
||||
|
||||
+51
-21
@@ -110,30 +110,60 @@ lockdep_count_backward_deps(struct lock_class *class)
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_DEBUG_LOCKDEP
|
||||
|
||||
#include <asm/local.h>
|
||||
/*
|
||||
* Various lockdep statistics:
|
||||
* Various lockdep statistics.
|
||||
* We want them per cpu as they are often accessed in fast path
|
||||
* and we want to avoid too much cache bouncing.
|
||||
*/
|
||||
extern atomic_t chain_lookup_hits;
|
||||
extern atomic_t chain_lookup_misses;
|
||||
extern atomic_t hardirqs_on_events;
|
||||
extern atomic_t hardirqs_off_events;
|
||||
extern atomic_t redundant_hardirqs_on;
|
||||
extern atomic_t redundant_hardirqs_off;
|
||||
extern atomic_t softirqs_on_events;
|
||||
extern atomic_t softirqs_off_events;
|
||||
extern atomic_t redundant_softirqs_on;
|
||||
extern atomic_t redundant_softirqs_off;
|
||||
extern atomic_t nr_unused_locks;
|
||||
extern atomic_t nr_cyclic_checks;
|
||||
extern atomic_t nr_cyclic_check_recursions;
|
||||
extern atomic_t nr_find_usage_forwards_checks;
|
||||
extern atomic_t nr_find_usage_forwards_recursions;
|
||||
extern atomic_t nr_find_usage_backwards_checks;
|
||||
extern atomic_t nr_find_usage_backwards_recursions;
|
||||
# define debug_atomic_inc(ptr) atomic_inc(ptr)
|
||||
# define debug_atomic_dec(ptr) atomic_dec(ptr)
|
||||
# define debug_atomic_read(ptr) atomic_read(ptr)
|
||||
struct lockdep_stats {
|
||||
int chain_lookup_hits;
|
||||
int chain_lookup_misses;
|
||||
int hardirqs_on_events;
|
||||
int hardirqs_off_events;
|
||||
int redundant_hardirqs_on;
|
||||
int redundant_hardirqs_off;
|
||||
int softirqs_on_events;
|
||||
int softirqs_off_events;
|
||||
int redundant_softirqs_on;
|
||||
int redundant_softirqs_off;
|
||||
int nr_unused_locks;
|
||||
int nr_cyclic_checks;
|
||||
int nr_cyclic_check_recursions;
|
||||
int nr_find_usage_forwards_checks;
|
||||
int nr_find_usage_forwards_recursions;
|
||||
int nr_find_usage_backwards_checks;
|
||||
int nr_find_usage_backwards_recursions;
|
||||
};
|
||||
|
||||
DECLARE_PER_CPU(struct lockdep_stats, lockdep_stats);
|
||||
|
||||
#define __debug_atomic_inc(ptr) \
|
||||
this_cpu_inc(lockdep_stats.ptr);
|
||||
|
||||
#define debug_atomic_inc(ptr) { \
|
||||
WARN_ON_ONCE(!irqs_disabled()); \
|
||||
__this_cpu_inc(lockdep_stats.ptr); \
|
||||
}
|
||||
|
||||
#define debug_atomic_dec(ptr) { \
|
||||
WARN_ON_ONCE(!irqs_disabled()); \
|
||||
__this_cpu_dec(lockdep_stats.ptr); \
|
||||
}
|
||||
|
||||
#define debug_atomic_read(ptr) ({ \
|
||||
struct lockdep_stats *__cpu_lockdep_stats; \
|
||||
unsigned long long __total = 0; \
|
||||
int __cpu; \
|
||||
for_each_possible_cpu(__cpu) { \
|
||||
__cpu_lockdep_stats = &per_cpu(lockdep_stats, __cpu); \
|
||||
__total += __cpu_lockdep_stats->ptr; \
|
||||
} \
|
||||
__total; \
|
||||
})
|
||||
#else
|
||||
# define __debug_atomic_inc(ptr) do { } while (0)
|
||||
# define debug_atomic_inc(ptr) do { } while (0)
|
||||
# define debug_atomic_dec(ptr) do { } while (0)
|
||||
# define debug_atomic_read(ptr) 0
|
||||
|
||||
+27
-27
@@ -184,34 +184,34 @@ static const struct file_operations proc_lockdep_chains_operations = {
|
||||
static void lockdep_stats_debug_show(struct seq_file *m)
|
||||
{
|
||||
#ifdef CONFIG_DEBUG_LOCKDEP
|
||||
unsigned int hi1 = debug_atomic_read(&hardirqs_on_events),
|
||||
hi2 = debug_atomic_read(&hardirqs_off_events),
|
||||
hr1 = debug_atomic_read(&redundant_hardirqs_on),
|
||||
hr2 = debug_atomic_read(&redundant_hardirqs_off),
|
||||
si1 = debug_atomic_read(&softirqs_on_events),
|
||||
si2 = debug_atomic_read(&softirqs_off_events),
|
||||
sr1 = debug_atomic_read(&redundant_softirqs_on),
|
||||
sr2 = debug_atomic_read(&redundant_softirqs_off);
|
||||
unsigned long long hi1 = debug_atomic_read(hardirqs_on_events),
|
||||
hi2 = debug_atomic_read(hardirqs_off_events),
|
||||
hr1 = debug_atomic_read(redundant_hardirqs_on),
|
||||
hr2 = debug_atomic_read(redundant_hardirqs_off),
|
||||
si1 = debug_atomic_read(softirqs_on_events),
|
||||
si2 = debug_atomic_read(softirqs_off_events),
|
||||
sr1 = debug_atomic_read(redundant_softirqs_on),
|
||||
sr2 = debug_atomic_read(redundant_softirqs_off);
|
||||
|
||||
seq_printf(m, " chain lookup misses: %11u\n",
|
||||
debug_atomic_read(&chain_lookup_misses));
|
||||
seq_printf(m, " chain lookup hits: %11u\n",
|
||||
debug_atomic_read(&chain_lookup_hits));
|
||||
seq_printf(m, " cyclic checks: %11u\n",
|
||||
debug_atomic_read(&nr_cyclic_checks));
|
||||
seq_printf(m, " find-mask forwards checks: %11u\n",
|
||||
debug_atomic_read(&nr_find_usage_forwards_checks));
|
||||
seq_printf(m, " find-mask backwards checks: %11u\n",
|
||||
debug_atomic_read(&nr_find_usage_backwards_checks));
|
||||
seq_printf(m, " chain lookup misses: %11llu\n",
|
||||
debug_atomic_read(chain_lookup_misses));
|
||||
seq_printf(m, " chain lookup hits: %11llu\n",
|
||||
debug_atomic_read(chain_lookup_hits));
|
||||
seq_printf(m, " cyclic checks: %11llu\n",
|
||||
debug_atomic_read(nr_cyclic_checks));
|
||||
seq_printf(m, " find-mask forwards checks: %11llu\n",
|
||||
debug_atomic_read(nr_find_usage_forwards_checks));
|
||||
seq_printf(m, " find-mask backwards checks: %11llu\n",
|
||||
debug_atomic_read(nr_find_usage_backwards_checks));
|
||||
|
||||
seq_printf(m, " hardirq on events: %11u\n", hi1);
|
||||
seq_printf(m, " hardirq off events: %11u\n", hi2);
|
||||
seq_printf(m, " redundant hardirq ons: %11u\n", hr1);
|
||||
seq_printf(m, " redundant hardirq offs: %11u\n", hr2);
|
||||
seq_printf(m, " softirq on events: %11u\n", si1);
|
||||
seq_printf(m, " softirq off events: %11u\n", si2);
|
||||
seq_printf(m, " redundant softirq ons: %11u\n", sr1);
|
||||
seq_printf(m, " redundant softirq offs: %11u\n", sr2);
|
||||
seq_printf(m, " hardirq on events: %11llu\n", hi1);
|
||||
seq_printf(m, " hardirq off events: %11llu\n", hi2);
|
||||
seq_printf(m, " redundant hardirq ons: %11llu\n", hr1);
|
||||
seq_printf(m, " redundant hardirq offs: %11llu\n", hr2);
|
||||
seq_printf(m, " softirq on events: %11llu\n", si1);
|
||||
seq_printf(m, " softirq off events: %11llu\n", si2);
|
||||
seq_printf(m, " redundant softirq ons: %11llu\n", sr1);
|
||||
seq_printf(m, " redundant softirq offs: %11llu\n", sr2);
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -263,7 +263,7 @@ static int lockdep_stats_show(struct seq_file *m, void *v)
|
||||
#endif
|
||||
}
|
||||
#ifdef CONFIG_DEBUG_LOCKDEP
|
||||
DEBUG_LOCKS_WARN_ON(debug_atomic_read(&nr_unused_locks) != nr_unused);
|
||||
DEBUG_LOCKS_WARN_ON(debug_atomic_read(nr_unused_locks) != nr_unused);
|
||||
#endif
|
||||
seq_printf(m, " lock-classes: %11lu [max: %lu]\n",
|
||||
nr_lock_classes, MAX_LOCKDEP_KEYS);
|
||||
|
||||
+220
-175
@@ -2297,11 +2297,6 @@ unlock:
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static unsigned long perf_data_size(struct perf_mmap_data *data)
|
||||
{
|
||||
return data->nr_pages << (PAGE_SHIFT + data->data_order);
|
||||
}
|
||||
|
||||
#ifndef CONFIG_PERF_USE_VMALLOC
|
||||
|
||||
/*
|
||||
@@ -2320,6 +2315,19 @@ perf_mmap_to_page(struct perf_mmap_data *data, unsigned long pgoff)
|
||||
return virt_to_page(data->data_pages[pgoff - 1]);
|
||||
}
|
||||
|
||||
static void *perf_mmap_alloc_page(int cpu)
|
||||
{
|
||||
struct page *page;
|
||||
int node;
|
||||
|
||||
node = (cpu == -1) ? cpu : cpu_to_node(cpu);
|
||||
page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
|
||||
if (!page)
|
||||
return NULL;
|
||||
|
||||
return page_address(page);
|
||||
}
|
||||
|
||||
static struct perf_mmap_data *
|
||||
perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
|
||||
{
|
||||
@@ -2336,17 +2344,16 @@ perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
|
||||
if (!data)
|
||||
goto fail;
|
||||
|
||||
data->user_page = (void *)get_zeroed_page(GFP_KERNEL);
|
||||
data->user_page = perf_mmap_alloc_page(event->cpu);
|
||||
if (!data->user_page)
|
||||
goto fail_user_page;
|
||||
|
||||
for (i = 0; i < nr_pages; i++) {
|
||||
data->data_pages[i] = (void *)get_zeroed_page(GFP_KERNEL);
|
||||
data->data_pages[i] = perf_mmap_alloc_page(event->cpu);
|
||||
if (!data->data_pages[i])
|
||||
goto fail_data_pages;
|
||||
}
|
||||
|
||||
data->data_order = 0;
|
||||
data->nr_pages = nr_pages;
|
||||
|
||||
return data;
|
||||
@@ -2382,6 +2389,11 @@ static void perf_mmap_data_free(struct perf_mmap_data *data)
|
||||
kfree(data);
|
||||
}
|
||||
|
||||
static inline int page_order(struct perf_mmap_data *data)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/*
|
||||
@@ -2390,10 +2402,15 @@ static void perf_mmap_data_free(struct perf_mmap_data *data)
|
||||
* Required for architectures that have d-cache aliasing issues.
|
||||
*/
|
||||
|
||||
static inline int page_order(struct perf_mmap_data *data)
|
||||
{
|
||||
return data->page_order;
|
||||
}
|
||||
|
||||
static struct page *
|
||||
perf_mmap_to_page(struct perf_mmap_data *data, unsigned long pgoff)
|
||||
{
|
||||
if (pgoff > (1UL << data->data_order))
|
||||
if (pgoff > (1UL << page_order(data)))
|
||||
return NULL;
|
||||
|
||||
return vmalloc_to_page((void *)data->user_page + pgoff * PAGE_SIZE);
|
||||
@@ -2413,7 +2430,7 @@ static void perf_mmap_data_free_work(struct work_struct *work)
|
||||
int i, nr;
|
||||
|
||||
data = container_of(work, struct perf_mmap_data, work);
|
||||
nr = 1 << data->data_order;
|
||||
nr = 1 << page_order(data);
|
||||
|
||||
base = data->user_page;
|
||||
for (i = 0; i < nr + 1; i++)
|
||||
@@ -2452,7 +2469,7 @@ perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
|
||||
|
||||
data->user_page = all_buf;
|
||||
data->data_pages[0] = all_buf + PAGE_SIZE;
|
||||
data->data_order = ilog2(nr_pages);
|
||||
data->page_order = ilog2(nr_pages);
|
||||
data->nr_pages = 1;
|
||||
|
||||
return data;
|
||||
@@ -2466,6 +2483,11 @@ fail:
|
||||
|
||||
#endif
|
||||
|
||||
static unsigned long perf_data_size(struct perf_mmap_data *data)
|
||||
{
|
||||
return data->nr_pages << (PAGE_SHIFT + page_order(data));
|
||||
}
|
||||
|
||||
static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||
{
|
||||
struct perf_event *event = vma->vm_file->private_data;
|
||||
@@ -2506,8 +2528,6 @@ perf_mmap_data_init(struct perf_event *event, struct perf_mmap_data *data)
|
||||
{
|
||||
long max_size = perf_data_size(data);
|
||||
|
||||
atomic_set(&data->lock, -1);
|
||||
|
||||
if (event->attr.watermark) {
|
||||
data->watermark = min_t(long, max_size,
|
||||
event->attr.wakeup_watermark);
|
||||
@@ -2580,6 +2600,14 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
|
||||
long user_extra, extra;
|
||||
int ret = 0;
|
||||
|
||||
/*
|
||||
* Don't allow mmap() of inherited per-task counters. This would
|
||||
* create a performance issue due to all children writing to the
|
||||
* same buffer.
|
||||
*/
|
||||
if (event->cpu == -1 && event->attr.inherit)
|
||||
return -EINVAL;
|
||||
|
||||
if (!(vma->vm_flags & VM_SHARED))
|
||||
return -EINVAL;
|
||||
|
||||
@@ -2885,120 +2913,80 @@ static void perf_output_wakeup(struct perf_output_handle *handle)
|
||||
}
|
||||
|
||||
/*
|
||||
* Curious locking construct.
|
||||
*
|
||||
* We need to ensure a later event_id doesn't publish a head when a former
|
||||
* event_id isn't done writing. However since we need to deal with NMIs we
|
||||
* event isn't done writing. However since we need to deal with NMIs we
|
||||
* cannot fully serialize things.
|
||||
*
|
||||
* What we do is serialize between CPUs so we only have to deal with NMI
|
||||
* nesting on a single CPU.
|
||||
*
|
||||
* We only publish the head (and generate a wakeup) when the outer-most
|
||||
* event_id completes.
|
||||
* event completes.
|
||||
*/
|
||||
static void perf_output_lock(struct perf_output_handle *handle)
|
||||
static void perf_output_get_handle(struct perf_output_handle *handle)
|
||||
{
|
||||
struct perf_mmap_data *data = handle->data;
|
||||
int cur, cpu = get_cpu();
|
||||
|
||||
handle->locked = 0;
|
||||
|
||||
for (;;) {
|
||||
cur = atomic_cmpxchg(&data->lock, -1, cpu);
|
||||
if (cur == -1) {
|
||||
handle->locked = 1;
|
||||
break;
|
||||
}
|
||||
if (cur == cpu)
|
||||
break;
|
||||
|
||||
cpu_relax();
|
||||
}
|
||||
preempt_disable();
|
||||
local_inc(&data->nest);
|
||||
handle->wakeup = local_read(&data->wakeup);
|
||||
}
|
||||
|
||||
static void perf_output_unlock(struct perf_output_handle *handle)
|
||||
static void perf_output_put_handle(struct perf_output_handle *handle)
|
||||
{
|
||||
struct perf_mmap_data *data = handle->data;
|
||||
unsigned long head;
|
||||
int cpu;
|
||||
|
||||
data->done_head = data->head;
|
||||
|
||||
if (!handle->locked)
|
||||
goto out;
|
||||
|
||||
again:
|
||||
/*
|
||||
* The xchg implies a full barrier that ensures all writes are done
|
||||
* before we publish the new head, matched by a rmb() in userspace when
|
||||
* reading this position.
|
||||
*/
|
||||
while ((head = atomic_long_xchg(&data->done_head, 0)))
|
||||
data->user_page->data_head = head;
|
||||
head = local_read(&data->head);
|
||||
|
||||
/*
|
||||
* NMI can happen here, which means we can miss a done_head update.
|
||||
* IRQ/NMI can happen here, which means we can miss a head update.
|
||||
*/
|
||||
|
||||
cpu = atomic_xchg(&data->lock, -1);
|
||||
WARN_ON_ONCE(cpu != smp_processor_id());
|
||||
if (!local_dec_and_test(&data->nest))
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Therefore we have to validate we did not indeed do so.
|
||||
* Publish the known good head. Rely on the full barrier implied
|
||||
* by atomic_dec_and_test() order the data->head read and this
|
||||
* write.
|
||||
*/
|
||||
if (unlikely(atomic_long_read(&data->done_head))) {
|
||||
/*
|
||||
* Since we had it locked, we can lock it again.
|
||||
*/
|
||||
while (atomic_cmpxchg(&data->lock, -1, cpu) != -1)
|
||||
cpu_relax();
|
||||
data->user_page->data_head = head;
|
||||
|
||||
/*
|
||||
* Now check if we missed an update, rely on the (compiler)
|
||||
* barrier in atomic_dec_and_test() to re-read data->head.
|
||||
*/
|
||||
if (unlikely(head != local_read(&data->head))) {
|
||||
local_inc(&data->nest);
|
||||
goto again;
|
||||
}
|
||||
|
||||
if (atomic_xchg(&data->wakeup, 0))
|
||||
if (handle->wakeup != local_read(&data->wakeup))
|
||||
perf_output_wakeup(handle);
|
||||
out:
|
||||
put_cpu();
|
||||
|
||||
out:
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
void perf_output_copy(struct perf_output_handle *handle,
|
||||
__always_inline void perf_output_copy(struct perf_output_handle *handle,
|
||||
const void *buf, unsigned int len)
|
||||
{
|
||||
unsigned int pages_mask;
|
||||
unsigned long offset;
|
||||
unsigned int size;
|
||||
void **pages;
|
||||
|
||||
offset = handle->offset;
|
||||
pages_mask = handle->data->nr_pages - 1;
|
||||
pages = handle->data->data_pages;
|
||||
|
||||
do {
|
||||
unsigned long page_offset;
|
||||
unsigned long page_size;
|
||||
int nr;
|
||||
unsigned long size = min_t(unsigned long, handle->size, len);
|
||||
|
||||
nr = (offset >> PAGE_SHIFT) & pages_mask;
|
||||
page_size = 1UL << (handle->data->data_order + PAGE_SHIFT);
|
||||
page_offset = offset & (page_size - 1);
|
||||
size = min_t(unsigned int, page_size - page_offset, len);
|
||||
memcpy(handle->addr, buf, size);
|
||||
|
||||
memcpy(pages[nr] + page_offset, buf, size);
|
||||
len -= size;
|
||||
handle->addr += size;
|
||||
handle->size -= size;
|
||||
if (!handle->size) {
|
||||
struct perf_mmap_data *data = handle->data;
|
||||
|
||||
len -= size;
|
||||
buf += size;
|
||||
offset += size;
|
||||
handle->page++;
|
||||
handle->page &= data->nr_pages - 1;
|
||||
handle->addr = data->data_pages[handle->page];
|
||||
handle->size = PAGE_SIZE << page_order(data);
|
||||
}
|
||||
} while (len);
|
||||
|
||||
handle->offset = offset;
|
||||
|
||||
/*
|
||||
* Check we didn't copy past our reservation window, taking the
|
||||
* possible unsigned int wrap into account.
|
||||
*/
|
||||
WARN_ON_ONCE(((long)(handle->head - handle->offset)) < 0);
|
||||
}
|
||||
|
||||
int perf_output_begin(struct perf_output_handle *handle,
|
||||
@@ -3036,13 +3024,13 @@ int perf_output_begin(struct perf_output_handle *handle,
|
||||
handle->sample = sample;
|
||||
|
||||
if (!data->nr_pages)
|
||||
goto fail;
|
||||
goto out;
|
||||
|
||||
have_lost = atomic_read(&data->lost);
|
||||
have_lost = local_read(&data->lost);
|
||||
if (have_lost)
|
||||
size += sizeof(lost_event);
|
||||
|
||||
perf_output_lock(handle);
|
||||
perf_output_get_handle(handle);
|
||||
|
||||
do {
|
||||
/*
|
||||
@@ -3052,24 +3040,28 @@ int perf_output_begin(struct perf_output_handle *handle,
|
||||
*/
|
||||
tail = ACCESS_ONCE(data->user_page->data_tail);
|
||||
smp_rmb();
|
||||
offset = head = atomic_long_read(&data->head);
|
||||
offset = head = local_read(&data->head);
|
||||
head += size;
|
||||
if (unlikely(!perf_output_space(data, tail, offset, head)))
|
||||
goto fail;
|
||||
} while (atomic_long_cmpxchg(&data->head, offset, head) != offset);
|
||||
} while (local_cmpxchg(&data->head, offset, head) != offset);
|
||||
|
||||
handle->offset = offset;
|
||||
handle->head = head;
|
||||
if (head - local_read(&data->wakeup) > data->watermark)
|
||||
local_add(data->watermark, &data->wakeup);
|
||||
|
||||
if (head - tail > data->watermark)
|
||||
atomic_set(&data->wakeup, 1);
|
||||
handle->page = offset >> (PAGE_SHIFT + page_order(data));
|
||||
handle->page &= data->nr_pages - 1;
|
||||
handle->size = offset & ((PAGE_SIZE << page_order(data)) - 1);
|
||||
handle->addr = data->data_pages[handle->page];
|
||||
handle->addr += handle->size;
|
||||
handle->size = (PAGE_SIZE << page_order(data)) - handle->size;
|
||||
|
||||
if (have_lost) {
|
||||
lost_event.header.type = PERF_RECORD_LOST;
|
||||
lost_event.header.misc = 0;
|
||||
lost_event.header.size = sizeof(lost_event);
|
||||
lost_event.id = event->id;
|
||||
lost_event.lost = atomic_xchg(&data->lost, 0);
|
||||
lost_event.lost = local_xchg(&data->lost, 0);
|
||||
|
||||
perf_output_put(handle, lost_event);
|
||||
}
|
||||
@@ -3077,8 +3069,8 @@ int perf_output_begin(struct perf_output_handle *handle,
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
atomic_inc(&data->lost);
|
||||
perf_output_unlock(handle);
|
||||
local_inc(&data->lost);
|
||||
perf_output_put_handle(handle);
|
||||
out:
|
||||
rcu_read_unlock();
|
||||
|
||||
@@ -3093,14 +3085,14 @@ void perf_output_end(struct perf_output_handle *handle)
|
||||
int wakeup_events = event->attr.wakeup_events;
|
||||
|
||||
if (handle->sample && wakeup_events) {
|
||||
int events = atomic_inc_return(&data->events);
|
||||
int events = local_inc_return(&data->events);
|
||||
if (events >= wakeup_events) {
|
||||
atomic_sub(wakeup_events, &data->events);
|
||||
atomic_set(&data->wakeup, 1);
|
||||
local_sub(wakeup_events, &data->events);
|
||||
local_inc(&data->wakeup);
|
||||
}
|
||||
}
|
||||
|
||||
perf_output_unlock(handle);
|
||||
perf_output_put_handle(handle);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
@@ -3436,22 +3428,13 @@ static void perf_event_task_output(struct perf_event *event,
|
||||
{
|
||||
struct perf_output_handle handle;
|
||||
struct task_struct *task = task_event->task;
|
||||
unsigned long flags;
|
||||
int size, ret;
|
||||
|
||||
/*
|
||||
* If this CPU attempts to acquire an rq lock held by a CPU spinning
|
||||
* in perf_output_lock() from interrupt context, it's game over.
|
||||
*/
|
||||
local_irq_save(flags);
|
||||
|
||||
size = task_event->event_id.header.size;
|
||||
ret = perf_output_begin(&handle, event, size, 0, 0);
|
||||
|
||||
if (ret) {
|
||||
local_irq_restore(flags);
|
||||
if (ret)
|
||||
return;
|
||||
}
|
||||
|
||||
task_event->event_id.pid = perf_event_pid(event, task);
|
||||
task_event->event_id.ppid = perf_event_pid(event, current);
|
||||
@@ -3462,7 +3445,6 @@ static void perf_event_task_output(struct perf_event *event,
|
||||
perf_output_put(&handle, task_event->event_id);
|
||||
|
||||
perf_output_end(&handle);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
static int perf_event_task_match(struct perf_event *event)
|
||||
@@ -4020,9 +4002,6 @@ static void perf_swevent_add(struct perf_event *event, u64 nr,
|
||||
perf_swevent_overflow(event, 0, nmi, data, regs);
|
||||
}
|
||||
|
||||
static int perf_tp_event_match(struct perf_event *event,
|
||||
struct perf_sample_data *data);
|
||||
|
||||
static int perf_exclude_event(struct perf_event *event,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
@@ -4052,10 +4031,6 @@ static int perf_swevent_match(struct perf_event *event,
|
||||
if (perf_exclude_event(event, regs))
|
||||
return 0;
|
||||
|
||||
if (event->attr.type == PERF_TYPE_TRACEPOINT &&
|
||||
!perf_tp_event_match(event, data))
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -4066,19 +4041,46 @@ static inline u64 swevent_hash(u64 type, u32 event_id)
|
||||
return hash_64(val, SWEVENT_HLIST_BITS);
|
||||
}
|
||||
|
||||
static struct hlist_head *
|
||||
find_swevent_head(struct perf_cpu_context *ctx, u64 type, u32 event_id)
|
||||
static inline struct hlist_head *
|
||||
__find_swevent_head(struct swevent_hlist *hlist, u64 type, u32 event_id)
|
||||
{
|
||||
u64 hash;
|
||||
struct swevent_hlist *hlist;
|
||||
u64 hash = swevent_hash(type, event_id);
|
||||
|
||||
hash = swevent_hash(type, event_id);
|
||||
return &hlist->heads[hash];
|
||||
}
|
||||
|
||||
/* For the read side: events when they trigger */
|
||||
static inline struct hlist_head *
|
||||
find_swevent_head_rcu(struct perf_cpu_context *ctx, u64 type, u32 event_id)
|
||||
{
|
||||
struct swevent_hlist *hlist;
|
||||
|
||||
hlist = rcu_dereference(ctx->swevent_hlist);
|
||||
if (!hlist)
|
||||
return NULL;
|
||||
|
||||
return &hlist->heads[hash];
|
||||
return __find_swevent_head(hlist, type, event_id);
|
||||
}
|
||||
|
||||
/* For the event head insertion and removal in the hlist */
|
||||
static inline struct hlist_head *
|
||||
find_swevent_head(struct perf_cpu_context *ctx, struct perf_event *event)
|
||||
{
|
||||
struct swevent_hlist *hlist;
|
||||
u32 event_id = event->attr.config;
|
||||
u64 type = event->attr.type;
|
||||
|
||||
/*
|
||||
* Event scheduling is always serialized against hlist allocation
|
||||
* and release. Which makes the protected version suitable here.
|
||||
* The context lock guarantees that.
|
||||
*/
|
||||
hlist = rcu_dereference_protected(ctx->swevent_hlist,
|
||||
lockdep_is_held(&event->ctx->lock));
|
||||
if (!hlist)
|
||||
return NULL;
|
||||
|
||||
return __find_swevent_head(hlist, type, event_id);
|
||||
}
|
||||
|
||||
static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
|
||||
@@ -4095,7 +4097,7 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
head = find_swevent_head(cpuctx, type, event_id);
|
||||
head = find_swevent_head_rcu(cpuctx, type, event_id);
|
||||
|
||||
if (!head)
|
||||
goto end;
|
||||
@@ -4110,7 +4112,7 @@ end:
|
||||
|
||||
int perf_swevent_get_recursion_context(void)
|
||||
{
|
||||
struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context);
|
||||
struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
|
||||
int rctx;
|
||||
|
||||
if (in_nmi())
|
||||
@@ -4122,10 +4124,8 @@ int perf_swevent_get_recursion_context(void)
|
||||
else
|
||||
rctx = 0;
|
||||
|
||||
if (cpuctx->recursion[rctx]) {
|
||||
put_cpu_var(perf_cpu_context);
|
||||
if (cpuctx->recursion[rctx])
|
||||
return -1;
|
||||
}
|
||||
|
||||
cpuctx->recursion[rctx]++;
|
||||
barrier();
|
||||
@@ -4139,7 +4139,6 @@ void perf_swevent_put_recursion_context(int rctx)
|
||||
struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
|
||||
barrier();
|
||||
cpuctx->recursion[rctx]--;
|
||||
put_cpu_var(perf_cpu_context);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context);
|
||||
|
||||
@@ -4150,6 +4149,7 @@ void __perf_sw_event(u32 event_id, u64 nr, int nmi,
|
||||
struct perf_sample_data data;
|
||||
int rctx;
|
||||
|
||||
preempt_disable_notrace();
|
||||
rctx = perf_swevent_get_recursion_context();
|
||||
if (rctx < 0)
|
||||
return;
|
||||
@@ -4159,6 +4159,7 @@ void __perf_sw_event(u32 event_id, u64 nr, int nmi,
|
||||
do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, &data, regs);
|
||||
|
||||
perf_swevent_put_recursion_context(rctx);
|
||||
preempt_enable_notrace();
|
||||
}
|
||||
|
||||
static void perf_swevent_read(struct perf_event *event)
|
||||
@@ -4178,7 +4179,7 @@ static int perf_swevent_enable(struct perf_event *event)
|
||||
perf_swevent_set_period(event);
|
||||
}
|
||||
|
||||
head = find_swevent_head(cpuctx, event->attr.type, event->attr.config);
|
||||
head = find_swevent_head(cpuctx, event);
|
||||
if (WARN_ON_ONCE(!head))
|
||||
return -EINVAL;
|
||||
|
||||
@@ -4366,6 +4367,14 @@ static const struct pmu perf_ops_task_clock = {
|
||||
.read = task_clock_perf_event_read,
|
||||
};
|
||||
|
||||
/* Deref the hlist from the update side */
|
||||
static inline struct swevent_hlist *
|
||||
swevent_hlist_deref(struct perf_cpu_context *cpuctx)
|
||||
{
|
||||
return rcu_dereference_protected(cpuctx->swevent_hlist,
|
||||
lockdep_is_held(&cpuctx->hlist_mutex));
|
||||
}
|
||||
|
||||
static void swevent_hlist_release_rcu(struct rcu_head *rcu_head)
|
||||
{
|
||||
struct swevent_hlist *hlist;
|
||||
@@ -4376,12 +4385,11 @@ static void swevent_hlist_release_rcu(struct rcu_head *rcu_head)
|
||||
|
||||
static void swevent_hlist_release(struct perf_cpu_context *cpuctx)
|
||||
{
|
||||
struct swevent_hlist *hlist;
|
||||
struct swevent_hlist *hlist = swevent_hlist_deref(cpuctx);
|
||||
|
||||
if (!cpuctx->swevent_hlist)
|
||||
if (!hlist)
|
||||
return;
|
||||
|
||||
hlist = cpuctx->swevent_hlist;
|
||||
rcu_assign_pointer(cpuctx->swevent_hlist, NULL);
|
||||
call_rcu(&hlist->rcu_head, swevent_hlist_release_rcu);
|
||||
}
|
||||
@@ -4418,7 +4426,7 @@ static int swevent_hlist_get_cpu(struct perf_event *event, int cpu)
|
||||
|
||||
mutex_lock(&cpuctx->hlist_mutex);
|
||||
|
||||
if (!cpuctx->swevent_hlist && cpu_online(cpu)) {
|
||||
if (!swevent_hlist_deref(cpuctx) && cpu_online(cpu)) {
|
||||
struct swevent_hlist *hlist;
|
||||
|
||||
hlist = kzalloc(sizeof(*hlist), GFP_KERNEL);
|
||||
@@ -4467,25 +4475,14 @@ static int swevent_hlist_get(struct perf_event *event)
|
||||
|
||||
#ifdef CONFIG_EVENT_TRACING
|
||||
|
||||
void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
|
||||
int entry_size, struct pt_regs *regs)
|
||||
{
|
||||
struct perf_sample_data data;
|
||||
struct perf_raw_record raw = {
|
||||
.size = entry_size,
|
||||
.data = record,
|
||||
};
|
||||
static const struct pmu perf_ops_tracepoint = {
|
||||
.enable = perf_trace_enable,
|
||||
.disable = perf_trace_disable,
|
||||
.read = perf_swevent_read,
|
||||
.unthrottle = perf_swevent_unthrottle,
|
||||
};
|
||||
|
||||
perf_sample_data_init(&data, addr);
|
||||
data.raw = &raw;
|
||||
|
||||
/* Trace events already protected against recursion */
|
||||
do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1,
|
||||
&data, regs);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(perf_tp_event);
|
||||
|
||||
static int perf_tp_event_match(struct perf_event *event,
|
||||
static int perf_tp_filter_match(struct perf_event *event,
|
||||
struct perf_sample_data *data)
|
||||
{
|
||||
void *record = data->raw->data;
|
||||
@@ -4495,10 +4492,49 @@ static int perf_tp_event_match(struct perf_event *event,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int perf_tp_event_match(struct perf_event *event,
|
||||
struct perf_sample_data *data,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
/*
|
||||
* All tracepoints are from kernel-space.
|
||||
*/
|
||||
if (event->attr.exclude_kernel)
|
||||
return 0;
|
||||
|
||||
if (!perf_tp_filter_match(event, data))
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
void perf_tp_event(u64 addr, u64 count, void *record, int entry_size,
|
||||
struct pt_regs *regs, struct hlist_head *head)
|
||||
{
|
||||
struct perf_sample_data data;
|
||||
struct perf_event *event;
|
||||
struct hlist_node *node;
|
||||
|
||||
struct perf_raw_record raw = {
|
||||
.size = entry_size,
|
||||
.data = record,
|
||||
};
|
||||
|
||||
perf_sample_data_init(&data, addr);
|
||||
data.raw = &raw;
|
||||
|
||||
rcu_read_lock();
|
||||
hlist_for_each_entry_rcu(event, node, head, hlist_entry) {
|
||||
if (perf_tp_event_match(event, &data, regs))
|
||||
perf_swevent_add(event, count, 1, &data, regs);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(perf_tp_event);
|
||||
|
||||
static void tp_perf_event_destroy(struct perf_event *event)
|
||||
{
|
||||
perf_trace_disable(event->attr.config);
|
||||
swevent_hlist_put(event);
|
||||
perf_trace_destroy(event);
|
||||
}
|
||||
|
||||
static const struct pmu *tp_perf_event_init(struct perf_event *event)
|
||||
@@ -4514,17 +4550,13 @@ static const struct pmu *tp_perf_event_init(struct perf_event *event)
|
||||
!capable(CAP_SYS_ADMIN))
|
||||
return ERR_PTR(-EPERM);
|
||||
|
||||
if (perf_trace_enable(event->attr.config))
|
||||
err = perf_trace_init(event);
|
||||
if (err)
|
||||
return NULL;
|
||||
|
||||
event->destroy = tp_perf_event_destroy;
|
||||
err = swevent_hlist_get(event);
|
||||
if (err) {
|
||||
perf_trace_disable(event->attr.config);
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
return &perf_ops_generic;
|
||||
return &perf_ops_tracepoint;
|
||||
}
|
||||
|
||||
static int perf_event_set_filter(struct perf_event *event, void __user *arg)
|
||||
@@ -4552,12 +4584,6 @@ static void perf_event_free_filter(struct perf_event *event)
|
||||
|
||||
#else
|
||||
|
||||
static int perf_tp_event_match(struct perf_event *event,
|
||||
struct perf_sample_data *data)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
static const struct pmu *tp_perf_event_init(struct perf_event *event)
|
||||
{
|
||||
return NULL;
|
||||
@@ -4894,6 +4920,13 @@ static int perf_event_set_output(struct perf_event *event, int output_fd)
|
||||
int fput_needed = 0;
|
||||
int ret = -EINVAL;
|
||||
|
||||
/*
|
||||
* Don't allow output of inherited per-task events. This would
|
||||
* create performance issues due to cross cpu access.
|
||||
*/
|
||||
if (event->cpu == -1 && event->attr.inherit)
|
||||
return -EINVAL;
|
||||
|
||||
if (!output_fd)
|
||||
goto set;
|
||||
|
||||
@@ -4914,6 +4947,18 @@ static int perf_event_set_output(struct perf_event *event, int output_fd)
|
||||
if (event->data)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Don't allow cross-cpu buffers
|
||||
*/
|
||||
if (output_event->cpu != event->cpu)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* If its not a per-cpu buffer, it must be the same task.
|
||||
*/
|
||||
if (output_event->cpu == -1 && output_event->ctx != event->ctx)
|
||||
goto out;
|
||||
|
||||
atomic_long_inc(&output_file->f_count);
|
||||
|
||||
set:
|
||||
|
||||
+3
-1
@@ -127,8 +127,10 @@ int __ref profile_init(void)
|
||||
return 0;
|
||||
|
||||
prof_buffer = vmalloc(buffer_bytes);
|
||||
if (prof_buffer)
|
||||
if (prof_buffer) {
|
||||
memset(prof_buffer, 0, buffer_bytes);
|
||||
return 0;
|
||||
}
|
||||
|
||||
free_cpumask_var(prof_cpu_mask);
|
||||
return -ENOMEM;
|
||||
|
||||
@@ -14,7 +14,6 @@
|
||||
#include <linux/mm.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/smp_lock.h>
|
||||
#include <linux/ptrace.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/signal.h>
|
||||
@@ -665,10 +664,6 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, long, addr, long, data)
|
||||
struct task_struct *child;
|
||||
long ret;
|
||||
|
||||
/*
|
||||
* This lock_kernel fixes a subtle race with suid exec
|
||||
*/
|
||||
lock_kernel();
|
||||
if (request == PTRACE_TRACEME) {
|
||||
ret = ptrace_traceme();
|
||||
if (!ret)
|
||||
@@ -702,7 +697,6 @@ SYSCALL_DEFINE4(ptrace, long, request, long, pid, long, addr, long, data)
|
||||
out_put_task_struct:
|
||||
put_task_struct(child);
|
||||
out:
|
||||
unlock_kernel();
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -812,10 +806,6 @@ asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid,
|
||||
struct task_struct *child;
|
||||
long ret;
|
||||
|
||||
/*
|
||||
* This lock_kernel fixes a subtle race with suid exec
|
||||
*/
|
||||
lock_kernel();
|
||||
if (request == PTRACE_TRACEME) {
|
||||
ret = ptrace_traceme();
|
||||
goto out;
|
||||
@@ -845,7 +835,6 @@ asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid,
|
||||
out_put_task_struct:
|
||||
put_task_struct(child);
|
||||
out:
|
||||
unlock_kernel();
|
||||
return ret;
|
||||
}
|
||||
#endif /* CONFIG_COMPAT */
|
||||
|
||||
+11
-19
@@ -44,7 +44,6 @@
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/kernel_stat.h>
|
||||
#include <linux/hardirq.h>
|
||||
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
@@ -64,9 +63,6 @@ struct lockdep_map rcu_sched_lock_map =
|
||||
EXPORT_SYMBOL_GPL(rcu_sched_lock_map);
|
||||
#endif
|
||||
|
||||
int rcu_scheduler_active __read_mostly;
|
||||
EXPORT_SYMBOL_GPL(rcu_scheduler_active);
|
||||
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
|
||||
int debug_lockdep_rcu_enabled(void)
|
||||
@@ -96,21 +92,6 @@ EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held);
|
||||
|
||||
#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
|
||||
|
||||
/*
|
||||
* This function is invoked towards the end of the scheduler's initialization
|
||||
* process. Before this is called, the idle task might contain
|
||||
* RCU read-side critical sections (during which time, this idle
|
||||
* task is booting the system). After this function is called, the
|
||||
* idle tasks are prohibited from containing RCU read-side critical
|
||||
* sections.
|
||||
*/
|
||||
void rcu_scheduler_starting(void)
|
||||
{
|
||||
WARN_ON(num_online_cpus() != 1);
|
||||
WARN_ON(nr_context_switches() > 0);
|
||||
rcu_scheduler_active = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Awaken the corresponding synchronize_rcu() instance now that a
|
||||
* grace period has elapsed.
|
||||
@@ -122,3 +103,14 @@ void wakeme_after_rcu(struct rcu_head *head)
|
||||
rcu = container_of(head, struct rcu_synchronize, head);
|
||||
complete(&rcu->completion);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PROVE_RCU
|
||||
/*
|
||||
* wrapper function to avoid #include problems.
|
||||
*/
|
||||
int rcu_my_thread_group_empty(void)
|
||||
{
|
||||
return thread_group_empty(current);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_my_thread_group_empty);
|
||||
#endif /* #ifdef CONFIG_PROVE_RCU */
|
||||
|
||||
+22
-13
@@ -44,9 +44,9 @@ struct rcu_ctrlblk {
|
||||
};
|
||||
|
||||
/* Definition for rcupdate control block. */
|
||||
static struct rcu_ctrlblk rcu_ctrlblk = {
|
||||
.donetail = &rcu_ctrlblk.rcucblist,
|
||||
.curtail = &rcu_ctrlblk.rcucblist,
|
||||
static struct rcu_ctrlblk rcu_sched_ctrlblk = {
|
||||
.donetail = &rcu_sched_ctrlblk.rcucblist,
|
||||
.curtail = &rcu_sched_ctrlblk.rcucblist,
|
||||
};
|
||||
|
||||
static struct rcu_ctrlblk rcu_bh_ctrlblk = {
|
||||
@@ -54,6 +54,11 @@ static struct rcu_ctrlblk rcu_bh_ctrlblk = {
|
||||
.curtail = &rcu_bh_ctrlblk.rcucblist,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
int rcu_scheduler_active __read_mostly;
|
||||
EXPORT_SYMBOL_GPL(rcu_scheduler_active);
|
||||
#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
|
||||
|
||||
#ifdef CONFIG_NO_HZ
|
||||
|
||||
static long rcu_dynticks_nesting = 1;
|
||||
@@ -108,7 +113,8 @@ static int rcu_qsctr_help(struct rcu_ctrlblk *rcp)
|
||||
*/
|
||||
void rcu_sched_qs(int cpu)
|
||||
{
|
||||
if (rcu_qsctr_help(&rcu_ctrlblk) + rcu_qsctr_help(&rcu_bh_ctrlblk))
|
||||
if (rcu_qsctr_help(&rcu_sched_ctrlblk) +
|
||||
rcu_qsctr_help(&rcu_bh_ctrlblk))
|
||||
raise_softirq(RCU_SOFTIRQ);
|
||||
}
|
||||
|
||||
@@ -173,7 +179,7 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
|
||||
*/
|
||||
static void rcu_process_callbacks(struct softirq_action *unused)
|
||||
{
|
||||
__rcu_process_callbacks(&rcu_ctrlblk);
|
||||
__rcu_process_callbacks(&rcu_sched_ctrlblk);
|
||||
__rcu_process_callbacks(&rcu_bh_ctrlblk);
|
||||
}
|
||||
|
||||
@@ -187,7 +193,8 @@ static void rcu_process_callbacks(struct softirq_action *unused)
|
||||
*
|
||||
* Cool, huh? (Due to Josh Triplett.)
|
||||
*
|
||||
* But we want to make this a static inline later.
|
||||
* But we want to make this a static inline later. The cond_resched()
|
||||
* currently makes this problematic.
|
||||
*/
|
||||
void synchronize_sched(void)
|
||||
{
|
||||
@@ -195,12 +202,6 @@ void synchronize_sched(void)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(synchronize_sched);
|
||||
|
||||
void synchronize_rcu_bh(void)
|
||||
{
|
||||
synchronize_sched();
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
|
||||
|
||||
/*
|
||||
* Helper function for call_rcu() and call_rcu_bh().
|
||||
*/
|
||||
@@ -226,7 +227,7 @@ static void __call_rcu(struct rcu_head *head,
|
||||
*/
|
||||
void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
|
||||
{
|
||||
__call_rcu(head, func, &rcu_ctrlblk);
|
||||
__call_rcu(head, func, &rcu_sched_ctrlblk);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(call_rcu);
|
||||
|
||||
@@ -244,11 +245,13 @@ void rcu_barrier(void)
|
||||
{
|
||||
struct rcu_synchronize rcu;
|
||||
|
||||
init_rcu_head_on_stack(&rcu.head);
|
||||
init_completion(&rcu.completion);
|
||||
/* Will wake me after RCU finished. */
|
||||
call_rcu(&rcu.head, wakeme_after_rcu);
|
||||
/* Wait for it. */
|
||||
wait_for_completion(&rcu.completion);
|
||||
destroy_rcu_head_on_stack(&rcu.head);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_barrier);
|
||||
|
||||
@@ -256,11 +259,13 @@ void rcu_barrier_bh(void)
|
||||
{
|
||||
struct rcu_synchronize rcu;
|
||||
|
||||
init_rcu_head_on_stack(&rcu.head);
|
||||
init_completion(&rcu.completion);
|
||||
/* Will wake me after RCU finished. */
|
||||
call_rcu_bh(&rcu.head, wakeme_after_rcu);
|
||||
/* Wait for it. */
|
||||
wait_for_completion(&rcu.completion);
|
||||
destroy_rcu_head_on_stack(&rcu.head);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_barrier_bh);
|
||||
|
||||
@@ -268,11 +273,13 @@ void rcu_barrier_sched(void)
|
||||
{
|
||||
struct rcu_synchronize rcu;
|
||||
|
||||
init_rcu_head_on_stack(&rcu.head);
|
||||
init_completion(&rcu.completion);
|
||||
/* Will wake me after RCU finished. */
|
||||
call_rcu_sched(&rcu.head, wakeme_after_rcu);
|
||||
/* Wait for it. */
|
||||
wait_for_completion(&rcu.completion);
|
||||
destroy_rcu_head_on_stack(&rcu.head);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(rcu_barrier_sched);
|
||||
|
||||
@@ -280,3 +287,5 @@ void __init rcu_init(void)
|
||||
{
|
||||
open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
|
||||
}
|
||||
|
||||
#include "rcutiny_plugin.h"
|
||||
|
||||
@@ -0,0 +1,39 @@
|
||||
/*
|
||||
* Read-Copy Update mechanism for mutual exclusion (tree-based version)
|
||||
* Internal non-public definitions that provide either classic
|
||||
* or preemptable semantics.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*
|
||||
* Copyright IBM Corporation, 2009
|
||||
*
|
||||
* Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
||||
|
||||
#include <linux/kernel_stat.h>
|
||||
|
||||
/*
|
||||
* During boot, we forgive RCU lockdep issues. After this function is
|
||||
* invoked, we start taking RCU lockdep issues seriously.
|
||||
*/
|
||||
void rcu_scheduler_starting(void)
|
||||
{
|
||||
WARN_ON(nr_context_switches() > 0);
|
||||
rcu_scheduler_active = 1;
|
||||
}
|
||||
|
||||
#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
|
||||
@@ -464,9 +464,11 @@ static void rcu_bh_torture_synchronize(void)
|
||||
{
|
||||
struct rcu_bh_torture_synchronize rcu;
|
||||
|
||||
init_rcu_head_on_stack(&rcu.head);
|
||||
init_completion(&rcu.completion);
|
||||
call_rcu_bh(&rcu.head, rcu_bh_torture_wakeme_after_cb);
|
||||
wait_for_completion(&rcu.completion);
|
||||
destroy_rcu_head_on_stack(&rcu.head);
|
||||
}
|
||||
|
||||
static struct rcu_torture_ops rcu_bh_ops = {
|
||||
|
||||
+96
-35
@@ -46,6 +46,7 @@
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/time.h>
|
||||
#include <linux/kernel_stat.h>
|
||||
|
||||
#include "rcutree.h"
|
||||
|
||||
@@ -53,8 +54,8 @@
|
||||
|
||||
static struct lock_class_key rcu_node_class[NUM_RCU_LVLS];
|
||||
|
||||
#define RCU_STATE_INITIALIZER(name) { \
|
||||
.level = { &name.node[0] }, \
|
||||
#define RCU_STATE_INITIALIZER(structname) { \
|
||||
.level = { &structname.node[0] }, \
|
||||
.levelcnt = { \
|
||||
NUM_RCU_LVL_0, /* root of hierarchy. */ \
|
||||
NUM_RCU_LVL_1, \
|
||||
@@ -65,13 +66,14 @@ static struct lock_class_key rcu_node_class[NUM_RCU_LVLS];
|
||||
.signaled = RCU_GP_IDLE, \
|
||||
.gpnum = -300, \
|
||||
.completed = -300, \
|
||||
.onofflock = __RAW_SPIN_LOCK_UNLOCKED(&name.onofflock), \
|
||||
.onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname.onofflock), \
|
||||
.orphan_cbs_list = NULL, \
|
||||
.orphan_cbs_tail = &name.orphan_cbs_list, \
|
||||
.orphan_cbs_tail = &structname.orphan_cbs_list, \
|
||||
.orphan_qlen = 0, \
|
||||
.fqslock = __RAW_SPIN_LOCK_UNLOCKED(&name.fqslock), \
|
||||
.fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname.fqslock), \
|
||||
.n_force_qs = 0, \
|
||||
.n_force_qs_ngp = 0, \
|
||||
.name = #structname, \
|
||||
}
|
||||
|
||||
struct rcu_state rcu_sched_state = RCU_STATE_INITIALIZER(rcu_sched_state);
|
||||
@@ -80,6 +82,9 @@ DEFINE_PER_CPU(struct rcu_data, rcu_sched_data);
|
||||
struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state);
|
||||
DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
|
||||
|
||||
int rcu_scheduler_active __read_mostly;
|
||||
EXPORT_SYMBOL_GPL(rcu_scheduler_active);
|
||||
|
||||
/*
|
||||
* Return true if an RCU grace period is in progress. The ACCESS_ONCE()s
|
||||
* permit this function to be invoked without holding the root rcu_node
|
||||
@@ -97,25 +102,32 @@ static int rcu_gp_in_progress(struct rcu_state *rsp)
|
||||
*/
|
||||
void rcu_sched_qs(int cpu)
|
||||
{
|
||||
struct rcu_data *rdp;
|
||||
struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu);
|
||||
|
||||
rdp = &per_cpu(rcu_sched_data, cpu);
|
||||
rdp->passed_quiesc_completed = rdp->gpnum - 1;
|
||||
barrier();
|
||||
rdp->passed_quiesc = 1;
|
||||
rcu_preempt_note_context_switch(cpu);
|
||||
}
|
||||
|
||||
void rcu_bh_qs(int cpu)
|
||||
{
|
||||
struct rcu_data *rdp;
|
||||
struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu);
|
||||
|
||||
rdp = &per_cpu(rcu_bh_data, cpu);
|
||||
rdp->passed_quiesc_completed = rdp->gpnum - 1;
|
||||
barrier();
|
||||
rdp->passed_quiesc = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Note a context switch. This is a quiescent state for RCU-sched,
|
||||
* and requires special handling for preemptible RCU.
|
||||
*/
|
||||
void rcu_note_context_switch(int cpu)
|
||||
{
|
||||
rcu_sched_qs(cpu);
|
||||
rcu_preempt_note_context_switch(cpu);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NO_HZ
|
||||
DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
|
||||
.dynticks_nesting = 1,
|
||||
@@ -438,6 +450,8 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
|
||||
|
||||
#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
|
||||
|
||||
int rcu_cpu_stall_panicking __read_mostly;
|
||||
|
||||
static void record_gp_stall_check_time(struct rcu_state *rsp)
|
||||
{
|
||||
rsp->gp_start = jiffies;
|
||||
@@ -470,7 +484,8 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
|
||||
|
||||
/* OK, time to rat on our buddy... */
|
||||
|
||||
printk(KERN_ERR "INFO: RCU detected CPU stalls:");
|
||||
printk(KERN_ERR "INFO: %s detected stalls on CPUs/tasks: {",
|
||||
rsp->name);
|
||||
rcu_for_each_leaf_node(rsp, rnp) {
|
||||
raw_spin_lock_irqsave(&rnp->lock, flags);
|
||||
rcu_print_task_stall(rnp);
|
||||
@@ -481,7 +496,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
|
||||
if (rnp->qsmask & (1UL << cpu))
|
||||
printk(" %d", rnp->grplo + cpu);
|
||||
}
|
||||
printk(" (detected by %d, t=%ld jiffies)\n",
|
||||
printk("} (detected by %d, t=%ld jiffies)\n",
|
||||
smp_processor_id(), (long)(jiffies - rsp->gp_start));
|
||||
trigger_all_cpu_backtrace();
|
||||
|
||||
@@ -497,8 +512,8 @@ static void print_cpu_stall(struct rcu_state *rsp)
|
||||
unsigned long flags;
|
||||
struct rcu_node *rnp = rcu_get_root(rsp);
|
||||
|
||||
printk(KERN_ERR "INFO: RCU detected CPU %d stall (t=%lu jiffies)\n",
|
||||
smp_processor_id(), jiffies - rsp->gp_start);
|
||||
printk(KERN_ERR "INFO: %s detected stall on CPU %d (t=%lu jiffies)\n",
|
||||
rsp->name, smp_processor_id(), jiffies - rsp->gp_start);
|
||||
trigger_all_cpu_backtrace();
|
||||
|
||||
raw_spin_lock_irqsave(&rnp->lock, flags);
|
||||
@@ -515,6 +530,8 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
|
||||
long delta;
|
||||
struct rcu_node *rnp;
|
||||
|
||||
if (rcu_cpu_stall_panicking)
|
||||
return;
|
||||
delta = jiffies - rsp->jiffies_stall;
|
||||
rnp = rdp->mynode;
|
||||
if ((rnp->qsmask & rdp->grpmask) && delta >= 0) {
|
||||
@@ -529,6 +546,21 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
|
||||
}
|
||||
}
|
||||
|
||||
static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr)
|
||||
{
|
||||
rcu_cpu_stall_panicking = 1;
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
|
||||
static struct notifier_block rcu_panic_block = {
|
||||
.notifier_call = rcu_panic,
|
||||
};
|
||||
|
||||
static void __init check_cpu_stall_init(void)
|
||||
{
|
||||
atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block);
|
||||
}
|
||||
|
||||
#else /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
|
||||
|
||||
static void record_gp_stall_check_time(struct rcu_state *rsp)
|
||||
@@ -539,6 +571,10 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
|
||||
{
|
||||
}
|
||||
|
||||
static void __init check_cpu_stall_init(void)
|
||||
{
|
||||
}
|
||||
|
||||
#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
|
||||
|
||||
/*
|
||||
@@ -1125,8 +1161,6 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
|
||||
*/
|
||||
void rcu_check_callbacks(int cpu, int user)
|
||||
{
|
||||
if (!rcu_pending(cpu))
|
||||
return; /* if nothing for RCU to do. */
|
||||
if (user ||
|
||||
(idle_cpu(cpu) && rcu_scheduler_active &&
|
||||
!in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
|
||||
@@ -1158,7 +1192,8 @@ void rcu_check_callbacks(int cpu, int user)
|
||||
rcu_bh_qs(cpu);
|
||||
}
|
||||
rcu_preempt_check_callbacks(cpu);
|
||||
raise_softirq(RCU_SOFTIRQ);
|
||||
if (rcu_pending(cpu))
|
||||
raise_softirq(RCU_SOFTIRQ);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
@@ -1236,11 +1271,11 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
|
||||
break; /* grace period idle or initializing, ignore. */
|
||||
|
||||
case RCU_SAVE_DYNTICK:
|
||||
|
||||
raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
|
||||
if (RCU_SIGNAL_INIT != RCU_SAVE_DYNTICK)
|
||||
break; /* So gcc recognizes the dead code. */
|
||||
|
||||
raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
|
||||
|
||||
/* Record dyntick-idle state. */
|
||||
force_qs_rnp(rsp, dyntick_save_progress_counter);
|
||||
raw_spin_lock(&rnp->lock); /* irqs already disabled */
|
||||
@@ -1449,11 +1484,13 @@ void synchronize_sched(void)
|
||||
if (rcu_blocking_is_gp())
|
||||
return;
|
||||
|
||||
init_rcu_head_on_stack(&rcu.head);
|
||||
init_completion(&rcu.completion);
|
||||
/* Will wake me after RCU finished. */
|
||||
call_rcu_sched(&rcu.head, wakeme_after_rcu);
|
||||
/* Wait for it. */
|
||||
wait_for_completion(&rcu.completion);
|
||||
destroy_rcu_head_on_stack(&rcu.head);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(synchronize_sched);
|
||||
|
||||
@@ -1473,11 +1510,13 @@ void synchronize_rcu_bh(void)
|
||||
if (rcu_blocking_is_gp())
|
||||
return;
|
||||
|
||||
init_rcu_head_on_stack(&rcu.head);
|
||||
init_completion(&rcu.completion);
|
||||
/* Will wake me after RCU finished. */
|
||||
call_rcu_bh(&rcu.head, wakeme_after_rcu);
|
||||
/* Wait for it. */
|
||||
wait_for_completion(&rcu.completion);
|
||||
destroy_rcu_head_on_stack(&rcu.head);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
|
||||
|
||||
@@ -1498,8 +1537,20 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
|
||||
check_cpu_stall(rsp, rdp);
|
||||
|
||||
/* Is the RCU core waiting for a quiescent state from this CPU? */
|
||||
if (rdp->qs_pending) {
|
||||
if (rdp->qs_pending && !rdp->passed_quiesc) {
|
||||
|
||||
/*
|
||||
* If force_quiescent_state() coming soon and this CPU
|
||||
* needs a quiescent state, and this is either RCU-sched
|
||||
* or RCU-bh, force a local reschedule.
|
||||
*/
|
||||
rdp->n_rp_qs_pending++;
|
||||
if (!rdp->preemptable &&
|
||||
ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs) - 1,
|
||||
jiffies))
|
||||
set_need_resched();
|
||||
} else if (rdp->qs_pending && rdp->passed_quiesc) {
|
||||
rdp->n_rp_report_qs++;
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -1766,6 +1817,21 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is invoked towards the end of the scheduler's initialization
|
||||
* process. Before this is called, the idle task might contain
|
||||
* RCU read-side critical sections (during which time, this idle
|
||||
* task is booting the system). After this function is called, the
|
||||
* idle tasks are prohibited from containing RCU read-side critical
|
||||
* sections. This function also enables RCU lockdep checking.
|
||||
*/
|
||||
void rcu_scheduler_starting(void)
|
||||
{
|
||||
WARN_ON(num_online_cpus() != 1);
|
||||
WARN_ON(nr_context_switches() > 0);
|
||||
rcu_scheduler_active = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Compute the per-level fanout, either using the exact fanout specified
|
||||
* or balancing the tree, depending on CONFIG_RCU_FANOUT_EXACT.
|
||||
@@ -1849,6 +1915,14 @@ static void __init rcu_init_one(struct rcu_state *rsp)
|
||||
INIT_LIST_HEAD(&rnp->blocked_tasks[3]);
|
||||
}
|
||||
}
|
||||
|
||||
rnp = rsp->level[NUM_RCU_LVLS - 1];
|
||||
for_each_possible_cpu(i) {
|
||||
while (i > rnp->grphi)
|
||||
rnp++;
|
||||
rsp->rda[i]->mynode = rnp;
|
||||
rcu_boot_init_percpu_data(i, rsp);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1859,19 +1933,11 @@ static void __init rcu_init_one(struct rcu_state *rsp)
|
||||
#define RCU_INIT_FLAVOR(rsp, rcu_data) \
|
||||
do { \
|
||||
int i; \
|
||||
int j; \
|
||||
struct rcu_node *rnp; \
|
||||
\
|
||||
rcu_init_one(rsp); \
|
||||
rnp = (rsp)->level[NUM_RCU_LVLS - 1]; \
|
||||
j = 0; \
|
||||
for_each_possible_cpu(i) { \
|
||||
if (i > rnp[j].grphi) \
|
||||
j++; \
|
||||
per_cpu(rcu_data, i).mynode = &rnp[j]; \
|
||||
(rsp)->rda[i] = &per_cpu(rcu_data, i); \
|
||||
rcu_boot_init_percpu_data(i, rsp); \
|
||||
} \
|
||||
rcu_init_one(rsp); \
|
||||
} while (0)
|
||||
|
||||
void __init rcu_init(void)
|
||||
@@ -1879,12 +1945,6 @@ void __init rcu_init(void)
|
||||
int cpu;
|
||||
|
||||
rcu_bootup_announce();
|
||||
#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
|
||||
printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n");
|
||||
#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
|
||||
#if NUM_RCU_LVL_4 != 0
|
||||
printk(KERN_INFO "Experimental four-level hierarchy is enabled.\n");
|
||||
#endif /* #if NUM_RCU_LVL_4 != 0 */
|
||||
RCU_INIT_FLAVOR(&rcu_sched_state, rcu_sched_data);
|
||||
RCU_INIT_FLAVOR(&rcu_bh_state, rcu_bh_data);
|
||||
__rcu_init_preempt();
|
||||
@@ -1898,6 +1958,7 @@ void __init rcu_init(void)
|
||||
cpu_notifier(rcu_cpu_notify, 0);
|
||||
for_each_online_cpu(cpu)
|
||||
rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu);
|
||||
check_cpu_stall_init();
|
||||
}
|
||||
|
||||
#include "rcutree_plugin.h"
|
||||
|
||||
@@ -223,6 +223,7 @@ struct rcu_data {
|
||||
/* 5) __rcu_pending() statistics. */
|
||||
unsigned long n_rcu_pending; /* rcu_pending() calls since boot. */
|
||||
unsigned long n_rp_qs_pending;
|
||||
unsigned long n_rp_report_qs;
|
||||
unsigned long n_rp_cb_ready;
|
||||
unsigned long n_rp_cpu_needs_gp;
|
||||
unsigned long n_rp_gp_completed;
|
||||
@@ -326,6 +327,7 @@ struct rcu_state {
|
||||
unsigned long jiffies_stall; /* Time at which to check */
|
||||
/* for CPU stalls. */
|
||||
#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
|
||||
char *name; /* Name of structure. */
|
||||
};
|
||||
|
||||
/* Return values for rcu_preempt_offline_tasks(). */
|
||||
|
||||
+61
-8
@@ -26,6 +26,45 @@
|
||||
|
||||
#include <linux/delay.h>
|
||||
|
||||
/*
|
||||
* Check the RCU kernel configuration parameters and print informative
|
||||
* messages about anything out of the ordinary. If you like #ifdef, you
|
||||
* will love this function.
|
||||
*/
|
||||
static void __init rcu_bootup_announce_oddness(void)
|
||||
{
|
||||
#ifdef CONFIG_RCU_TRACE
|
||||
printk(KERN_INFO "\tRCU debugfs-based tracing is enabled.\n");
|
||||
#endif
|
||||
#if (defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 64) || (!defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 32)
|
||||
printk(KERN_INFO "\tCONFIG_RCU_FANOUT set to non-default value of %d\n",
|
||||
CONFIG_RCU_FANOUT);
|
||||
#endif
|
||||
#ifdef CONFIG_RCU_FANOUT_EXACT
|
||||
printk(KERN_INFO "\tHierarchical RCU autobalancing is disabled.\n");
|
||||
#endif
|
||||
#ifdef CONFIG_RCU_FAST_NO_HZ
|
||||
printk(KERN_INFO
|
||||
"\tRCU dyntick-idle grace-period acceleration is enabled.\n");
|
||||
#endif
|
||||
#ifdef CONFIG_PROVE_RCU
|
||||
printk(KERN_INFO "\tRCU lockdep checking is enabled.\n");
|
||||
#endif
|
||||
#ifdef CONFIG_RCU_TORTURE_TEST_RUNNABLE
|
||||
printk(KERN_INFO "\tRCU torture testing starts during boot.\n");
|
||||
#endif
|
||||
#ifndef CONFIG_RCU_CPU_STALL_DETECTOR
|
||||
printk(KERN_INFO
|
||||
"\tRCU-based detection of stalled CPUs is disabled.\n");
|
||||
#endif
|
||||
#ifndef CONFIG_RCU_CPU_STALL_VERBOSE
|
||||
printk(KERN_INFO "\tVerbose stalled-CPUs detection is disabled.\n");
|
||||
#endif
|
||||
#if NUM_RCU_LVL_4 != 0
|
||||
printk(KERN_INFO "\tExperimental four-level hierarchy is enabled.\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef CONFIG_TREE_PREEMPT_RCU
|
||||
|
||||
struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state);
|
||||
@@ -38,8 +77,8 @@ static int rcu_preempted_readers_exp(struct rcu_node *rnp);
|
||||
*/
|
||||
static void __init rcu_bootup_announce(void)
|
||||
{
|
||||
printk(KERN_INFO
|
||||
"Experimental preemptable hierarchical RCU implementation.\n");
|
||||
printk(KERN_INFO "Preemptable hierarchical RCU implementation.\n");
|
||||
rcu_bootup_announce_oddness();
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -75,13 +114,19 @@ EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
|
||||
* that this just means that the task currently running on the CPU is
|
||||
* not in a quiescent state. There might be any number of tasks blocked
|
||||
* while in an RCU read-side critical section.
|
||||
*
|
||||
* Unlike the other rcu_*_qs() functions, callers to this function
|
||||
* must disable irqs in order to protect the assignment to
|
||||
* ->rcu_read_unlock_special.
|
||||
*/
|
||||
static void rcu_preempt_qs(int cpu)
|
||||
{
|
||||
struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu);
|
||||
|
||||
rdp->passed_quiesc_completed = rdp->gpnum - 1;
|
||||
barrier();
|
||||
rdp->passed_quiesc = 1;
|
||||
current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -144,9 +189,8 @@ static void rcu_preempt_note_context_switch(int cpu)
|
||||
* grace period, then the fact that the task has been enqueued
|
||||
* means that we continue to block the current grace period.
|
||||
*/
|
||||
rcu_preempt_qs(cpu);
|
||||
local_irq_save(flags);
|
||||
t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
|
||||
rcu_preempt_qs(cpu);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
@@ -236,7 +280,6 @@ static void rcu_read_unlock_special(struct task_struct *t)
|
||||
*/
|
||||
special = t->rcu_read_unlock_special;
|
||||
if (special & RCU_READ_UNLOCK_NEED_QS) {
|
||||
t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
|
||||
rcu_preempt_qs(smp_processor_id());
|
||||
}
|
||||
|
||||
@@ -473,7 +516,6 @@ static void rcu_preempt_check_callbacks(int cpu)
|
||||
struct task_struct *t = current;
|
||||
|
||||
if (t->rcu_read_lock_nesting == 0) {
|
||||
t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
|
||||
rcu_preempt_qs(cpu);
|
||||
return;
|
||||
}
|
||||
@@ -515,11 +557,13 @@ void synchronize_rcu(void)
|
||||
if (!rcu_scheduler_active)
|
||||
return;
|
||||
|
||||
init_rcu_head_on_stack(&rcu.head);
|
||||
init_completion(&rcu.completion);
|
||||
/* Will wake me after RCU finished. */
|
||||
call_rcu(&rcu.head, wakeme_after_rcu);
|
||||
/* Wait for it. */
|
||||
wait_for_completion(&rcu.completion);
|
||||
destroy_rcu_head_on_stack(&rcu.head);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(synchronize_rcu);
|
||||
|
||||
@@ -754,6 +798,7 @@ void exit_rcu(void)
|
||||
static void __init rcu_bootup_announce(void)
|
||||
{
|
||||
printk(KERN_INFO "Hierarchical RCU implementation.\n");
|
||||
rcu_bootup_announce_oddness();
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1008,6 +1053,8 @@ static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff);
|
||||
int rcu_needs_cpu(int cpu)
|
||||
{
|
||||
int c = 0;
|
||||
int snap;
|
||||
int snap_nmi;
|
||||
int thatcpu;
|
||||
|
||||
/* Check for being in the holdoff period. */
|
||||
@@ -1015,12 +1062,18 @@ int rcu_needs_cpu(int cpu)
|
||||
return rcu_needs_cpu_quick_check(cpu);
|
||||
|
||||
/* Don't bother unless we are the last non-dyntick-idle CPU. */
|
||||
for_each_cpu_not(thatcpu, nohz_cpu_mask)
|
||||
if (thatcpu != cpu) {
|
||||
for_each_online_cpu(thatcpu) {
|
||||
if (thatcpu == cpu)
|
||||
continue;
|
||||
snap = per_cpu(rcu_dynticks, thatcpu).dynticks;
|
||||
snap_nmi = per_cpu(rcu_dynticks, thatcpu).dynticks_nmi;
|
||||
smp_mb(); /* Order sampling of snap with end of grace period. */
|
||||
if (((snap & 0x1) != 0) || ((snap_nmi & 0x1) != 0)) {
|
||||
per_cpu(rcu_dyntick_drain, cpu) = 0;
|
||||
per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1;
|
||||
return rcu_needs_cpu_quick_check(cpu);
|
||||
}
|
||||
}
|
||||
|
||||
/* Check and update the rcu_dyntick_drain sequencing. */
|
||||
if (per_cpu(rcu_dyntick_drain, cpu) <= 0) {
|
||||
|
||||
@@ -241,11 +241,13 @@ static const struct file_operations rcugp_fops = {
|
||||
static void print_one_rcu_pending(struct seq_file *m, struct rcu_data *rdp)
|
||||
{
|
||||
seq_printf(m, "%3d%cnp=%ld "
|
||||
"qsp=%ld cbr=%ld cng=%ld gpc=%ld gps=%ld nf=%ld nn=%ld\n",
|
||||
"qsp=%ld rpq=%ld cbr=%ld cng=%ld "
|
||||
"gpc=%ld gps=%ld nf=%ld nn=%ld\n",
|
||||
rdp->cpu,
|
||||
cpu_is_offline(rdp->cpu) ? '!' : ' ',
|
||||
rdp->n_rcu_pending,
|
||||
rdp->n_rp_qs_pending,
|
||||
rdp->n_rp_report_qs,
|
||||
rdp->n_rp_cb_ready,
|
||||
rdp->n_rp_cpu_needs_gp,
|
||||
rdp->n_rp_gp_completed,
|
||||
|
||||
+1
-1
@@ -3608,7 +3608,7 @@ need_resched:
|
||||
preempt_disable();
|
||||
cpu = smp_processor_id();
|
||||
rq = cpu_rq(cpu);
|
||||
rcu_sched_qs(cpu);
|
||||
rcu_note_context_switch(cpu);
|
||||
prev = rq->curr;
|
||||
switch_count = &prev->nivcsw;
|
||||
|
||||
|
||||
@@ -114,7 +114,9 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
|
||||
{
|
||||
char path[64];
|
||||
|
||||
rcu_read_lock();
|
||||
cgroup_path(task_group(p)->css.cgroup, path, sizeof(path));
|
||||
rcu_read_unlock();
|
||||
SEQ_printf(m, " %s", path);
|
||||
}
|
||||
#endif
|
||||
|
||||
+1
-1
@@ -716,7 +716,7 @@ static int run_ksoftirqd(void * __bind_cpu)
|
||||
preempt_enable_no_resched();
|
||||
cond_resched();
|
||||
preempt_disable();
|
||||
rcu_sched_qs((long)__bind_cpu);
|
||||
rcu_note_context_switch((long)__bind_cpu);
|
||||
}
|
||||
preempt_enable();
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
|
||||
@@ -294,7 +294,6 @@ static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb,
|
||||
struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
|
||||
unsigned int cpu = (unsigned long)hcpu;
|
||||
struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
|
||||
struct cpu_stop_work *work;
|
||||
struct task_struct *p;
|
||||
|
||||
switch (action & ~CPU_TASKS_FROZEN) {
|
||||
@@ -323,6 +322,9 @@ static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb,
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
case CPU_UP_CANCELED:
|
||||
case CPU_DEAD:
|
||||
{
|
||||
struct cpu_stop_work *work;
|
||||
|
||||
/* kill the stopper */
|
||||
kthread_stop(stopper->thread);
|
||||
/* drain remaining works */
|
||||
@@ -335,6 +337,7 @@ static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb,
|
||||
put_task_struct(stopper->thread);
|
||||
stopper->thread = NULL;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@@ -9,13 +9,9 @@
|
||||
#include <linux/kprobes.h>
|
||||
#include "trace.h"
|
||||
|
||||
DEFINE_PER_CPU(struct pt_regs, perf_trace_regs);
|
||||
EXPORT_PER_CPU_SYMBOL_GPL(perf_trace_regs);
|
||||
|
||||
EXPORT_SYMBOL_GPL(perf_arch_fetch_caller_regs);
|
||||
|
||||
static char *perf_trace_buf;
|
||||
static char *perf_trace_buf_nmi;
|
||||
static char *perf_trace_buf[4];
|
||||
|
||||
/*
|
||||
* Force it to be aligned to unsigned long to avoid misaligned accesses
|
||||
@@ -27,63 +23,82 @@ typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)])
|
||||
/* Count the events in use (per event id, not per instance) */
|
||||
static int total_ref_count;
|
||||
|
||||
static int perf_trace_event_enable(struct ftrace_event_call *event)
|
||||
static int perf_trace_event_init(struct ftrace_event_call *tp_event,
|
||||
struct perf_event *p_event)
|
||||
{
|
||||
char *buf;
|
||||
struct hlist_head *list;
|
||||
int ret = -ENOMEM;
|
||||
int cpu;
|
||||
|
||||
if (event->perf_refcount++ > 0)
|
||||
p_event->tp_event = tp_event;
|
||||
if (tp_event->perf_refcount++ > 0)
|
||||
return 0;
|
||||
|
||||
list = alloc_percpu(struct hlist_head);
|
||||
if (!list)
|
||||
goto fail;
|
||||
|
||||
for_each_possible_cpu(cpu)
|
||||
INIT_HLIST_HEAD(per_cpu_ptr(list, cpu));
|
||||
|
||||
tp_event->perf_events = list;
|
||||
|
||||
if (!total_ref_count) {
|
||||
buf = (char *)alloc_percpu(perf_trace_t);
|
||||
if (!buf)
|
||||
goto fail_buf;
|
||||
char *buf;
|
||||
int i;
|
||||
|
||||
rcu_assign_pointer(perf_trace_buf, buf);
|
||||
for (i = 0; i < 4; i++) {
|
||||
buf = (char *)alloc_percpu(perf_trace_t);
|
||||
if (!buf)
|
||||
goto fail;
|
||||
|
||||
buf = (char *)alloc_percpu(perf_trace_t);
|
||||
if (!buf)
|
||||
goto fail_buf_nmi;
|
||||
|
||||
rcu_assign_pointer(perf_trace_buf_nmi, buf);
|
||||
perf_trace_buf[i] = buf;
|
||||
}
|
||||
}
|
||||
|
||||
if (event->class->reg)
|
||||
ret = event->class->reg(event, TRACE_REG_PERF_REGISTER);
|
||||
if (tp_event->class->reg)
|
||||
ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER);
|
||||
else
|
||||
ret = tracepoint_probe_register(event->name,
|
||||
event->class->perf_probe,
|
||||
event);
|
||||
if (!ret) {
|
||||
total_ref_count++;
|
||||
return 0;
|
||||
ret = tracepoint_probe_register(tp_event->name,
|
||||
tp_event->class->perf_probe,
|
||||
tp_event);
|
||||
|
||||
if (ret)
|
||||
goto fail;
|
||||
|
||||
total_ref_count++;
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
if (!total_ref_count) {
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
free_percpu(perf_trace_buf[i]);
|
||||
perf_trace_buf[i] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
fail_buf_nmi:
|
||||
if (!total_ref_count) {
|
||||
free_percpu(perf_trace_buf_nmi);
|
||||
free_percpu(perf_trace_buf);
|
||||
perf_trace_buf_nmi = NULL;
|
||||
perf_trace_buf = NULL;
|
||||
if (!--tp_event->perf_refcount) {
|
||||
free_percpu(tp_event->perf_events);
|
||||
tp_event->perf_events = NULL;
|
||||
}
|
||||
fail_buf:
|
||||
event->perf_refcount--;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int perf_trace_enable(int event_id)
|
||||
int perf_trace_init(struct perf_event *p_event)
|
||||
{
|
||||
struct ftrace_event_call *event;
|
||||
struct ftrace_event_call *tp_event;
|
||||
int event_id = p_event->attr.config;
|
||||
int ret = -EINVAL;
|
||||
|
||||
mutex_lock(&event_mutex);
|
||||
list_for_each_entry(event, &ftrace_events, list) {
|
||||
if (event->event.type == event_id &&
|
||||
event->class && event->class->perf_probe &&
|
||||
try_module_get(event->mod)) {
|
||||
ret = perf_trace_event_enable(event);
|
||||
list_for_each_entry(tp_event, &ftrace_events, list) {
|
||||
if (tp_event->event.type == event_id &&
|
||||
tp_event->class && tp_event->class->perf_probe &&
|
||||
try_module_get(tp_event->mod)) {
|
||||
ret = perf_trace_event_init(tp_event, p_event);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -92,93 +107,76 @@ int perf_trace_enable(int event_id)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void perf_trace_event_disable(struct ftrace_event_call *event)
|
||||
int perf_trace_enable(struct perf_event *p_event)
|
||||
{
|
||||
char *buf, *nmi_buf;
|
||||
struct ftrace_event_call *tp_event = p_event->tp_event;
|
||||
struct hlist_head *list;
|
||||
|
||||
if (--event->perf_refcount > 0)
|
||||
return;
|
||||
list = tp_event->perf_events;
|
||||
if (WARN_ON_ONCE(!list))
|
||||
return -EINVAL;
|
||||
|
||||
if (event->class->reg)
|
||||
event->class->reg(event, TRACE_REG_PERF_UNREGISTER);
|
||||
else
|
||||
tracepoint_probe_unregister(event->name, event->class->perf_probe, event);
|
||||
list = per_cpu_ptr(list, smp_processor_id());
|
||||
hlist_add_head_rcu(&p_event->hlist_entry, list);
|
||||
|
||||
if (!--total_ref_count) {
|
||||
buf = perf_trace_buf;
|
||||
rcu_assign_pointer(perf_trace_buf, NULL);
|
||||
|
||||
nmi_buf = perf_trace_buf_nmi;
|
||||
rcu_assign_pointer(perf_trace_buf_nmi, NULL);
|
||||
|
||||
/*
|
||||
* Ensure every events in profiling have finished before
|
||||
* releasing the buffers
|
||||
*/
|
||||
synchronize_sched();
|
||||
|
||||
free_percpu(buf);
|
||||
free_percpu(nmi_buf);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void perf_trace_disable(int event_id)
|
||||
void perf_trace_disable(struct perf_event *p_event)
|
||||
{
|
||||
struct ftrace_event_call *event;
|
||||
hlist_del_rcu(&p_event->hlist_entry);
|
||||
}
|
||||
|
||||
mutex_lock(&event_mutex);
|
||||
list_for_each_entry(event, &ftrace_events, list) {
|
||||
if (event->event.type == event_id) {
|
||||
perf_trace_event_disable(event);
|
||||
module_put(event->mod);
|
||||
break;
|
||||
void perf_trace_destroy(struct perf_event *p_event)
|
||||
{
|
||||
struct ftrace_event_call *tp_event = p_event->tp_event;
|
||||
int i;
|
||||
|
||||
if (--tp_event->perf_refcount > 0)
|
||||
return;
|
||||
|
||||
if (tp_event->class->reg)
|
||||
tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER);
|
||||
else
|
||||
tracepoint_probe_unregister(tp_event->name,
|
||||
tp_event->class->perf_probe,
|
||||
tp_event);
|
||||
|
||||
free_percpu(tp_event->perf_events);
|
||||
tp_event->perf_events = NULL;
|
||||
|
||||
if (!--total_ref_count) {
|
||||
for (i = 0; i < 4; i++) {
|
||||
free_percpu(perf_trace_buf[i]);
|
||||
perf_trace_buf[i] = NULL;
|
||||
}
|
||||
}
|
||||
mutex_unlock(&event_mutex);
|
||||
}
|
||||
|
||||
__kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
|
||||
int *rctxp, unsigned long *irq_flags)
|
||||
struct pt_regs *regs, int *rctxp)
|
||||
{
|
||||
struct trace_entry *entry;
|
||||
char *trace_buf, *raw_data;
|
||||
int pc, cpu;
|
||||
char *raw_data;
|
||||
int pc;
|
||||
|
||||
BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long));
|
||||
|
||||
pc = preempt_count();
|
||||
|
||||
/* Protect the per cpu buffer, begin the rcu read side */
|
||||
local_irq_save(*irq_flags);
|
||||
|
||||
*rctxp = perf_swevent_get_recursion_context();
|
||||
if (*rctxp < 0)
|
||||
goto err_recursion;
|
||||
return NULL;
|
||||
|
||||
cpu = smp_processor_id();
|
||||
|
||||
if (in_nmi())
|
||||
trace_buf = rcu_dereference_sched(perf_trace_buf_nmi);
|
||||
else
|
||||
trace_buf = rcu_dereference_sched(perf_trace_buf);
|
||||
|
||||
if (!trace_buf)
|
||||
goto err;
|
||||
|
||||
raw_data = per_cpu_ptr(trace_buf, cpu);
|
||||
raw_data = per_cpu_ptr(perf_trace_buf[*rctxp], smp_processor_id());
|
||||
|
||||
/* zero the dead bytes from align to not leak stack to user */
|
||||
memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64));
|
||||
|
||||
entry = (struct trace_entry *)raw_data;
|
||||
tracing_generic_entry_update(entry, *irq_flags, pc);
|
||||
tracing_generic_entry_update(entry, regs->flags, pc);
|
||||
entry->type = type;
|
||||
|
||||
return raw_data;
|
||||
err:
|
||||
perf_swevent_put_recursion_context(*rctxp);
|
||||
err_recursion:
|
||||
local_irq_restore(*irq_flags);
|
||||
return NULL;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(perf_trace_buf_prepare);
|
||||
|
||||
@@ -1338,9 +1338,9 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp,
|
||||
struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
|
||||
struct ftrace_event_call *call = &tp->call;
|
||||
struct kprobe_trace_entry_head *entry;
|
||||
struct hlist_head *head;
|
||||
u8 *data;
|
||||
int size, __size, i;
|
||||
unsigned long irq_flags;
|
||||
int rctx;
|
||||
|
||||
__size = sizeof(*entry) + tp->size;
|
||||
@@ -1350,8 +1350,7 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp,
|
||||
"profile buffer not large enough"))
|
||||
return;
|
||||
|
||||
entry = perf_trace_buf_prepare(size, call->event.type,
|
||||
&rctx, &irq_flags);
|
||||
entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx);
|
||||
if (!entry)
|
||||
return;
|
||||
|
||||
@@ -1360,7 +1359,8 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp,
|
||||
for (i = 0; i < tp->nr_args; i++)
|
||||
call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
|
||||
|
||||
perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, irq_flags, regs);
|
||||
head = per_cpu_ptr(call->perf_events, smp_processor_id());
|
||||
perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head);
|
||||
}
|
||||
|
||||
/* Kretprobe profile handler */
|
||||
@@ -1370,9 +1370,9 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
|
||||
struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
|
||||
struct ftrace_event_call *call = &tp->call;
|
||||
struct kretprobe_trace_entry_head *entry;
|
||||
struct hlist_head *head;
|
||||
u8 *data;
|
||||
int size, __size, i;
|
||||
unsigned long irq_flags;
|
||||
int rctx;
|
||||
|
||||
__size = sizeof(*entry) + tp->size;
|
||||
@@ -1382,8 +1382,7 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
|
||||
"profile buffer not large enough"))
|
||||
return;
|
||||
|
||||
entry = perf_trace_buf_prepare(size, call->event.type,
|
||||
&rctx, &irq_flags);
|
||||
entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx);
|
||||
if (!entry)
|
||||
return;
|
||||
|
||||
@@ -1393,8 +1392,8 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
|
||||
for (i = 0; i < tp->nr_args; i++)
|
||||
call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
|
||||
|
||||
perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1,
|
||||
irq_flags, regs);
|
||||
head = per_cpu_ptr(call->perf_events, smp_processor_id());
|
||||
perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head);
|
||||
}
|
||||
|
||||
static int probe_perf_enable(struct ftrace_event_call *call)
|
||||
|
||||
@@ -488,7 +488,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
|
||||
{
|
||||
struct syscall_metadata *sys_data;
|
||||
struct syscall_trace_enter *rec;
|
||||
unsigned long flags;
|
||||
struct hlist_head *head;
|
||||
int syscall_nr;
|
||||
int rctx;
|
||||
int size;
|
||||
@@ -511,15 +511,16 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
|
||||
return;
|
||||
|
||||
rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size,
|
||||
sys_data->enter_event->event.type,
|
||||
&rctx, &flags);
|
||||
sys_data->enter_event->event.type, regs, &rctx);
|
||||
if (!rec)
|
||||
return;
|
||||
|
||||
rec->nr = syscall_nr;
|
||||
syscall_get_arguments(current, regs, 0, sys_data->nb_args,
|
||||
(unsigned long *)&rec->args);
|
||||
perf_trace_buf_submit(rec, size, rctx, 0, 1, flags, regs);
|
||||
|
||||
head = per_cpu_ptr(sys_data->enter_event->perf_events, smp_processor_id());
|
||||
perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head);
|
||||
}
|
||||
|
||||
int perf_sysenter_enable(struct ftrace_event_call *call)
|
||||
@@ -561,7 +562,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
|
||||
{
|
||||
struct syscall_metadata *sys_data;
|
||||
struct syscall_trace_exit *rec;
|
||||
unsigned long flags;
|
||||
struct hlist_head *head;
|
||||
int syscall_nr;
|
||||
int rctx;
|
||||
int size;
|
||||
@@ -587,15 +588,15 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
|
||||
return;
|
||||
|
||||
rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size,
|
||||
sys_data->exit_event->event.type,
|
||||
&rctx, &flags);
|
||||
sys_data->exit_event->event.type, regs, &rctx);
|
||||
if (!rec)
|
||||
return;
|
||||
|
||||
rec->nr = syscall_nr;
|
||||
rec->ret = syscall_get_return_value(current, regs);
|
||||
|
||||
perf_trace_buf_submit(rec, size, rctx, 0, 1, flags, regs);
|
||||
head = per_cpu_ptr(sys_data->exit_event->perf_events, smp_processor_id());
|
||||
perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head);
|
||||
}
|
||||
|
||||
int perf_sysexit_enable(struct ftrace_event_call *call)
|
||||
|
||||
Reference in New Issue
Block a user