Merge branch 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (57 commits)
  x86, perf events: Check if we have APIC enabled
  perf_event: Fix variable initialization in other codepaths
  perf kmem: Fix unused argument build warning
  perf symbols: perf_header__read_build_ids() offset'n'size should be u64
  perf symbols: dsos__read_build_ids() should read both user and kernel buildids
  perf tools: Align long options which have no short forms
  perf kmem: Show usage if no option is specified
  sched: Mark sched_clock() as notrace
  perf sched: Add max delay time snapshot
  perf tools: Correct size given to memset
  perf_event: Fix perf_swevent_hrtimer() variable initialization
  perf sched: Fix for getting task's execution time
  tracing/kprobes: Fix field creation's bad error handling
  perf_event: Cleanup for cpu_clock_perf_event_update()
  perf_event: Allocate children's perf_event_ctxp at the right time
  perf_event: Clean up __perf_event_init_context()
  hw-breakpoints: Modify breakpoints without unregistering them
  perf probe: Update perf-probe document
  perf probe: Support --del option
  trace-kprobe: Support delete probe syntax
  ...
This commit is contained in:
Linus Torvalds
2009-12-11 20:47:30 -08:00
40 changed files with 889 additions and 576 deletions
+100 -70
View File
@@ -52,7 +52,7 @@
static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned);
/* Number of pinned task breakpoints in a cpu */
static DEFINE_PER_CPU(unsigned int, task_bp_pinned[HBP_NUM]);
static DEFINE_PER_CPU(unsigned int, nr_task_bp_pinned[HBP_NUM]);
/* Number of non-pinned cpu/task breakpoints in a cpu */
static DEFINE_PER_CPU(unsigned int, nr_bp_flexible);
@@ -73,7 +73,7 @@ static DEFINE_MUTEX(nr_bp_mutex);
static unsigned int max_task_bp_pinned(int cpu)
{
int i;
unsigned int *tsk_pinned = per_cpu(task_bp_pinned, cpu);
unsigned int *tsk_pinned = per_cpu(nr_task_bp_pinned, cpu);
for (i = HBP_NUM -1; i >= 0; i--) {
if (tsk_pinned[i] > 0)
@@ -83,50 +83,16 @@ static unsigned int max_task_bp_pinned(int cpu)
return 0;
}
/*
* Report the number of pinned/un-pinned breakpoints we have in
* a given cpu (cpu > -1) or in all of them (cpu = -1).
*/
static void fetch_bp_busy_slots(struct bp_busy_slots *slots, int cpu)
static int task_bp_pinned(struct task_struct *tsk)
{
if (cpu >= 0) {
slots->pinned = per_cpu(nr_cpu_bp_pinned, cpu);
slots->pinned += max_task_bp_pinned(cpu);
slots->flexible = per_cpu(nr_bp_flexible, cpu);
return;
}
for_each_online_cpu(cpu) {
unsigned int nr;
nr = per_cpu(nr_cpu_bp_pinned, cpu);
nr += max_task_bp_pinned(cpu);
if (nr > slots->pinned)
slots->pinned = nr;
nr = per_cpu(nr_bp_flexible, cpu);
if (nr > slots->flexible)
slots->flexible = nr;
}
}
/*
* Add a pinned breakpoint for the given task in our constraint table
*/
static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable)
{
int count = 0;
struct perf_event *bp;
struct perf_event_context *ctx = tsk->perf_event_ctxp;
unsigned int *tsk_pinned;
struct list_head *list;
struct perf_event *bp;
unsigned long flags;
int count = 0;
if (WARN_ONCE(!ctx, "No perf context for this task"))
return;
return 0;
list = &ctx->event_list;
@@ -143,10 +109,60 @@ static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable)
spin_unlock_irqrestore(&ctx->lock, flags);
if (WARN_ONCE(count < 0, "No breakpoint counter found in the counter list"))
return;
return count;
}
tsk_pinned = per_cpu(task_bp_pinned, cpu);
/*
* Report the number of pinned/un-pinned breakpoints we have in
* a given cpu (cpu > -1) or in all of them (cpu = -1).
*/
static void
fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp)
{
int cpu = bp->cpu;
struct task_struct *tsk = bp->ctx->task;
if (cpu >= 0) {
slots->pinned = per_cpu(nr_cpu_bp_pinned, cpu);
if (!tsk)
slots->pinned += max_task_bp_pinned(cpu);
else
slots->pinned += task_bp_pinned(tsk);
slots->flexible = per_cpu(nr_bp_flexible, cpu);
return;
}
for_each_online_cpu(cpu) {
unsigned int nr;
nr = per_cpu(nr_cpu_bp_pinned, cpu);
if (!tsk)
nr += max_task_bp_pinned(cpu);
else
nr += task_bp_pinned(tsk);
if (nr > slots->pinned)
slots->pinned = nr;
nr = per_cpu(nr_bp_flexible, cpu);
if (nr > slots->flexible)
slots->flexible = nr;
}
}
/*
* Add a pinned breakpoint for the given task in our constraint table
*/
static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable)
{
unsigned int *tsk_pinned;
int count = 0;
count = task_bp_pinned(tsk);
tsk_pinned = per_cpu(nr_task_bp_pinned, cpu);
if (enable) {
tsk_pinned[count]++;
if (count > 0)
@@ -193,7 +209,7 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable)
* - If attached to a single cpu, check:
*
* (per_cpu(nr_bp_flexible, cpu) || (per_cpu(nr_cpu_bp_pinned, cpu)
* + max(per_cpu(task_bp_pinned, cpu)))) < HBP_NUM
* + max(per_cpu(nr_task_bp_pinned, cpu)))) < HBP_NUM
*
* -> If there are already non-pinned counters in this cpu, it means
* there is already a free slot for them.
@@ -204,7 +220,7 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable)
* - If attached to every cpus, check:
*
* (per_cpu(nr_bp_flexible, *) || (max(per_cpu(nr_cpu_bp_pinned, *))
* + max(per_cpu(task_bp_pinned, *)))) < HBP_NUM
* + max(per_cpu(nr_task_bp_pinned, *)))) < HBP_NUM
*
* -> This is roughly the same, except we check the number of per cpu
* bp for every cpu and we keep the max one. Same for the per tasks
@@ -216,7 +232,7 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable)
* - If attached to a single cpu, check:
*
* ((per_cpu(nr_bp_flexible, cpu) > 1) + per_cpu(nr_cpu_bp_pinned, cpu)
* + max(per_cpu(task_bp_pinned, cpu))) < HBP_NUM
* + max(per_cpu(nr_task_bp_pinned, cpu))) < HBP_NUM
*
* -> Same checks as before. But now the nr_bp_flexible, if any, must keep
* one register at least (or they will never be fed).
@@ -224,7 +240,7 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable)
* - If attached to every cpus, check:
*
* ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *))
* + max(per_cpu(task_bp_pinned, *))) < HBP_NUM
* + max(per_cpu(nr_task_bp_pinned, *))) < HBP_NUM
*/
int reserve_bp_slot(struct perf_event *bp)
{
@@ -233,7 +249,7 @@ int reserve_bp_slot(struct perf_event *bp)
mutex_lock(&nr_bp_mutex);
fetch_bp_busy_slots(&slots, bp->cpu);
fetch_bp_busy_slots(&slots, bp);
/* Flexible counters need to keep at least one slot */
if (slots.pinned + (!!slots.flexible) == HBP_NUM) {
@@ -259,7 +275,7 @@ void release_bp_slot(struct perf_event *bp)
}
int __register_perf_hw_breakpoint(struct perf_event *bp)
int register_perf_hw_breakpoint(struct perf_event *bp)
{
int ret;
@@ -276,19 +292,12 @@ int __register_perf_hw_breakpoint(struct perf_event *bp)
* This is a quick hack that will be removed soon, once we remove
* the tmp breakpoints from ptrace
*/
if (!bp->attr.disabled || bp->callback == perf_bp_event)
if (!bp->attr.disabled || !bp->overflow_handler)
ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task);
return ret;
}
int register_perf_hw_breakpoint(struct perf_event *bp)
{
bp->callback = perf_bp_event;
return __register_perf_hw_breakpoint(bp);
}
/**
* register_user_hw_breakpoint - register a hardware breakpoint for user space
* @attr: breakpoint attributes
@@ -297,7 +306,7 @@ int register_perf_hw_breakpoint(struct perf_event *bp)
*/
struct perf_event *
register_user_hw_breakpoint(struct perf_event_attr *attr,
perf_callback_t triggered,
perf_overflow_handler_t triggered,
struct task_struct *tsk)
{
return perf_event_create_kernel_counter(attr, -1, tsk->pid, triggered);
@@ -311,19 +320,40 @@ EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
* @triggered: callback to trigger when we hit the breakpoint
* @tsk: pointer to 'task_struct' of the process to which the address belongs
*/
struct perf_event *
modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr,
perf_callback_t triggered,
struct task_struct *tsk)
int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr)
{
/*
* FIXME: do it without unregistering
* - We don't want to lose our slot
* - If the new bp is incorrect, don't lose the older one
*/
unregister_hw_breakpoint(bp);
u64 old_addr = bp->attr.bp_addr;
int old_type = bp->attr.bp_type;
int old_len = bp->attr.bp_len;
int err = 0;
return perf_event_create_kernel_counter(attr, -1, tsk->pid, triggered);
perf_event_disable(bp);
bp->attr.bp_addr = attr->bp_addr;
bp->attr.bp_type = attr->bp_type;
bp->attr.bp_len = attr->bp_len;
if (attr->disabled)
goto end;
err = arch_validate_hwbkpt_settings(bp, bp->ctx->task);
if (!err)
perf_event_enable(bp);
if (err) {
bp->attr.bp_addr = old_addr;
bp->attr.bp_type = old_type;
bp->attr.bp_len = old_len;
if (!bp->attr.disabled)
perf_event_enable(bp);
return err;
}
end:
bp->attr.disabled = attr->disabled;
return 0;
}
EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint);
@@ -348,7 +378,7 @@ EXPORT_SYMBOL_GPL(unregister_hw_breakpoint);
*/
struct perf_event **
register_wide_hw_breakpoint(struct perf_event_attr *attr,
perf_callback_t triggered)
perf_overflow_handler_t triggered)
{
struct perf_event **cpu_events, **pevent, *bp;
long err;
+38 -37
View File
@@ -36,7 +36,7 @@
/*
* Each CPU has a list of per CPU events:
*/
DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context);
static DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context);
int perf_max_events __read_mostly = 1;
static int perf_reserved_percpu __read_mostly;
@@ -567,7 +567,7 @@ static void __perf_event_disable(void *info)
* is the current context on this CPU and preemption is disabled,
* hence we can't get into perf_event_task_sched_out for this context.
*/
static void perf_event_disable(struct perf_event *event)
void perf_event_disable(struct perf_event *event)
{
struct perf_event_context *ctx = event->ctx;
struct task_struct *task = ctx->task;
@@ -971,7 +971,7 @@ static void __perf_event_enable(void *info)
* perf_event_for_each_child or perf_event_for_each as described
* for perf_event_disable.
*/
static void perf_event_enable(struct perf_event *event)
void perf_event_enable(struct perf_event *event)
{
struct perf_event_context *ctx = event->ctx;
struct task_struct *task = ctx->task;
@@ -1579,7 +1579,6 @@ static void
__perf_event_init_context(struct perf_event_context *ctx,
struct task_struct *task)
{
memset(ctx, 0, sizeof(*ctx));
spin_lock_init(&ctx->lock);
mutex_init(&ctx->mutex);
INIT_LIST_HEAD(&ctx->group_list);
@@ -1654,7 +1653,7 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu)
}
if (!ctx) {
ctx = kmalloc(sizeof(struct perf_event_context), GFP_KERNEL);
ctx = kzalloc(sizeof(struct perf_event_context), GFP_KERNEL);
err = -ENOMEM;
if (!ctx)
goto errout;
@@ -4011,6 +4010,7 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer)
event->pmu->read(event);
data.addr = 0;
data.raw = NULL;
data.period = event->hw.last_period;
regs = get_irq_regs();
/*
@@ -4080,8 +4080,7 @@ static void cpu_clock_perf_event_update(struct perf_event *event)
u64 now;
now = cpu_clock(cpu);
prev = atomic64_read(&event->hw.prev_count);
atomic64_set(&event->hw.prev_count, now);
prev = atomic64_xchg(&event->hw.prev_count, now);
atomic64_add(now - prev, &event->count);
}
@@ -4286,15 +4285,8 @@ static void bp_perf_event_destroy(struct perf_event *event)
static const struct pmu *bp_perf_event_init(struct perf_event *bp)
{
int err;
/*
* The breakpoint is already filled if we haven't created the counter
* through perf syscall
* FIXME: manage to get trigerred to NULL if it comes from syscalls
*/
if (!bp->callback)
err = register_perf_hw_breakpoint(bp);
else
err = __register_perf_hw_breakpoint(bp);
err = register_perf_hw_breakpoint(bp);
if (err)
return ERR_PTR(err);
@@ -4308,6 +4300,7 @@ void perf_bp_event(struct perf_event *bp, void *data)
struct perf_sample_data sample;
struct pt_regs *regs = data;
sample.raw = NULL;
sample.addr = bp->attr.bp_addr;
if (!perf_exclude_event(bp, regs))
@@ -4390,7 +4383,7 @@ perf_event_alloc(struct perf_event_attr *attr,
struct perf_event_context *ctx,
struct perf_event *group_leader,
struct perf_event *parent_event,
perf_callback_t callback,
perf_overflow_handler_t overflow_handler,
gfp_t gfpflags)
{
const struct pmu *pmu;
@@ -4433,10 +4426,10 @@ perf_event_alloc(struct perf_event_attr *attr,
event->state = PERF_EVENT_STATE_INACTIVE;
if (!callback && parent_event)
callback = parent_event->callback;
if (!overflow_handler && parent_event)
overflow_handler = parent_event->overflow_handler;
event->callback = callback;
event->overflow_handler = overflow_handler;
if (attr->disabled)
event->state = PERF_EVENT_STATE_OFF;
@@ -4776,7 +4769,8 @@ err_put_context:
*/
struct perf_event *
perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
pid_t pid, perf_callback_t callback)
pid_t pid,
perf_overflow_handler_t overflow_handler)
{
struct perf_event *event;
struct perf_event_context *ctx;
@@ -4793,7 +4787,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
}
event = perf_event_alloc(attr, cpu, ctx, NULL,
NULL, callback, GFP_KERNEL);
NULL, overflow_handler, GFP_KERNEL);
if (IS_ERR(event)) {
err = PTR_ERR(event);
goto err_put_context;
@@ -5090,7 +5084,7 @@ again:
*/
int perf_event_init_task(struct task_struct *child)
{
struct perf_event_context *child_ctx, *parent_ctx;
struct perf_event_context *child_ctx = NULL, *parent_ctx;
struct perf_event_context *cloned_ctx;
struct perf_event *event;
struct task_struct *parent = current;
@@ -5105,20 +5099,6 @@ int perf_event_init_task(struct task_struct *child)
if (likely(!parent->perf_event_ctxp))
return 0;
/*
* This is executed from the parent task context, so inherit
* events that have been marked for cloning.
* First allocate and initialize a context for the child.
*/
child_ctx = kmalloc(sizeof(struct perf_event_context), GFP_KERNEL);
if (!child_ctx)
return -ENOMEM;
__perf_event_init_context(child_ctx, child);
child->perf_event_ctxp = child_ctx;
get_task_struct(child);
/*
* If the parent's context is a clone, pin it so it won't get
* swapped under us.
@@ -5149,6 +5129,26 @@ int perf_event_init_task(struct task_struct *child)
continue;
}
if (!child->perf_event_ctxp) {
/*
* This is executed from the parent task context, so
* inherit events that have been marked for cloning.
* First allocate and initialize a context for the
* child.
*/
child_ctx = kzalloc(sizeof(struct perf_event_context),
GFP_KERNEL);
if (!child_ctx) {
ret = -ENOMEM;
goto exit;
}
__perf_event_init_context(child_ctx, child);
child->perf_event_ctxp = child_ctx;
get_task_struct(child);
}
ret = inherit_group(event, parent, parent_ctx,
child, child_ctx);
if (ret) {
@@ -5177,6 +5177,7 @@ int perf_event_init_task(struct task_struct *child)
get_ctx(child_ctx->parent_ctx);
}
exit:
mutex_unlock(&parent_ctx->mutex);
perf_unpin_context(parent_ctx);
+30 -11
View File
@@ -606,23 +606,22 @@ static int create_trace_probe(int argc, char **argv)
*/
struct trace_probe *tp;
int i, ret = 0;
int is_return = 0;
int is_return = 0, is_delete = 0;
char *symbol = NULL, *event = NULL, *arg = NULL, *group = NULL;
unsigned long offset = 0;
void *addr = NULL;
char buf[MAX_EVENT_NAME_LEN];
if (argc < 2) {
pr_info("Probe point is not specified.\n");
return -EINVAL;
}
/* argc must be >= 1 */
if (argv[0][0] == 'p')
is_return = 0;
else if (argv[0][0] == 'r')
is_return = 1;
else if (argv[0][0] == '-')
is_delete = 1;
else {
pr_info("Probe definition must be started with 'p' or 'r'.\n");
pr_info("Probe definition must be started with 'p', 'r' or"
" '-'.\n");
return -EINVAL;
}
@@ -642,7 +641,29 @@ static int create_trace_probe(int argc, char **argv)
return -EINVAL;
}
}
if (!group)
group = KPROBE_EVENT_SYSTEM;
if (is_delete) {
if (!event) {
pr_info("Delete command needs an event name.\n");
return -EINVAL;
}
tp = find_probe_event(event, group);
if (!tp) {
pr_info("Event %s/%s doesn't exist.\n", group, event);
return -ENOENT;
}
/* delete an event */
unregister_trace_probe(tp);
free_trace_probe(tp);
return 0;
}
if (argc < 2) {
pr_info("Probe point is not specified.\n");
return -EINVAL;
}
if (isdigit(argv[1][0])) {
if (is_return) {
pr_info("Return probe point must be a symbol.\n");
@@ -671,8 +692,6 @@ static int create_trace_probe(int argc, char **argv)
argc -= 2; argv += 2;
/* setup a probe */
if (!group)
group = KPROBE_EVENT_SYSTEM;
if (!event) {
/* Make a new event name */
if (symbol)
@@ -1114,7 +1133,7 @@ static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
struct trace_probe *tp = (struct trace_probe *)event_call->data;
ret = trace_define_common_fields(event_call);
if (!ret)
if (ret)
return ret;
DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
@@ -1132,7 +1151,7 @@ static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
struct trace_probe *tp = (struct trace_probe *)event_call->data;
ret = trace_define_common_fields(event_call);
if (!ret)
if (ret)
return ret;
DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0);
+3 -2
View File
@@ -79,11 +79,12 @@ void ksym_collect_stats(unsigned long hbp_hit_addr)
}
#endif /* CONFIG_PROFILE_KSYM_TRACER */
void ksym_hbp_handler(struct perf_event *hbp, void *data)
void ksym_hbp_handler(struct perf_event *hbp, int nmi,
struct perf_sample_data *data,
struct pt_regs *regs)
{
struct ring_buffer_event *event;
struct ksym_trace_entry *entry;
struct pt_regs *regs = data;
struct ring_buffer *buffer;
int pc;