perf: Move swevent_htable::recursion into task_struct.

BugLink: https://bugs.launchpad.net/bugs/2089700

The swevent_htable::recursion counter is used to avoid creating an
swevent while an event is processed to avoid recursion. The counter is
per-CPU and preemption must be disabled to have a stable counter.
perf_pending_task() disables preemption to access the counter and then
signal. This is problematic on PREEMPT_RT because sending a signal uses
a spinlock_t which must not be acquired in atomic on PREEMPT_RT because
it becomes a sleeping lock.

The atomic context can be avoided by moving the counter into the
task_struct. There is a 4 byte hole between futex_state (usually always
on) and the following perf pointer (perf_event_ctxp). After the
recursion lost some weight it fits perfectly.

Move swevent_htable::recursion into task_struct.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Marco Elver <elver@google.com>
Link: https://lore.kernel.org/r/20240704170424.1466941-6-bigeasy@linutronix.de
(cherry picked from 0d40a6d83e3e6751f1107ba33587262d937c969f)
Signed-off-by: Kevin Becker <kevin.becker@canonical.com>
Acked-by: Magali Lemes <magali.lemes@canonical.com>
Acked-by: John Cabaj <john.cabaj@canonical.com>
This commit is contained in:
Sebastian Andrzej Siewior
2024-07-04 19:03:39 +02:00
committed by Kevin Becker
parent 15e0bd29c0
commit c51fa128d4
4 changed files with 11 additions and 17 deletions
-6
View File
@@ -967,12 +967,6 @@ struct perf_event_context {
local_t nr_pending;
};
/*
* Number of contexts where an event can trigger:
* task, softirq, hardirq, nmi.
*/
#define PERF_NR_CONTEXTS 4
struct perf_cpu_pmu_context {
struct perf_event_pmu_context epc;
struct perf_event_pmu_context *task_epc;
+7
View File
@@ -736,6 +736,12 @@ enum perf_event_task_context {
perf_nr_task_contexts,
};
/*
* Number of contexts where an event can trigger:
* task, softirq, hardirq, nmi.
*/
#define PERF_NR_CONTEXTS 4
struct wake_q_node {
struct wake_q_node *next;
};
@@ -1253,6 +1259,7 @@ struct task_struct {
unsigned int futex_state;
#endif
#ifdef CONFIG_PERF_EVENTS
u8 perf_recursion[PERF_NR_CONTEXTS];
struct perf_event_context *perf_event_ctxp;
struct mutex perf_event_mutex;
struct list_head perf_event_list;
+3 -10
View File
@@ -9689,11 +9689,7 @@ struct swevent_htable {
struct swevent_hlist *swevent_hlist;
struct mutex hlist_mutex;
int hlist_refcount;
/* Recursion avoidance in each contexts */
u8 recursion[PERF_NR_CONTEXTS];
};
static DEFINE_PER_CPU(struct swevent_htable, swevent_htable);
/*
@@ -9891,17 +9887,13 @@ DEFINE_PER_CPU(struct pt_regs, __perf_regs[4]);
int perf_swevent_get_recursion_context(void)
{
struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable);
return get_recursion_context(swhash->recursion);
return get_recursion_context(current->perf_recursion);
}
EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context);
void perf_swevent_put_recursion_context(int rctx)
{
struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable);
put_recursion_context(swhash->recursion, rctx);
put_recursion_context(current->perf_recursion, rctx);
}
void ___perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
@@ -13674,6 +13666,7 @@ int perf_event_init_task(struct task_struct *child, u64 clone_flags)
{
int ret;
memset(child->perf_recursion, 0, sizeof(child->perf_recursion));
child->perf_event_ctxp = NULL;
mutex_init(&child->perf_event_mutex);
INIT_LIST_HEAD(&child->perf_event_list);
+1 -1
View File
@@ -222,7 +222,7 @@ static inline int get_recursion_context(u8 *recursion)
return rctx;
}
static inline void put_recursion_context(u8 *recursion, int rctx)
static inline void put_recursion_context(u8 *recursion, unsigned char rctx)
{
barrier();
recursion[rctx]--;