From b24c1517d9cc137d7ad4d356cc68fd94680ab998 Mon Sep 17 00:00:00 2001 From: pengdonglin Date: Sun, 29 Dec 2024 11:26:20 +0800 Subject: [PATCH] ANDROID: Fix invalid caller output in trace_sched_blocked_reason I encountered an issue where Perfetto sometimes fails to display the blocked function when a thread enters the D state. The problem arises from the caller value in the trace_sched_blocked_reason, and I observed the following outputs while using bpftrace: bpftrace -e 't:sched:sched_blocked_reason /args.caller > 0 && args.caller < 0xFFFFFFFF/ {@[args.caller]=count();}' Attaching 1 probe... ^C @[0x26a8]: 1 @[0x188]: 1 @[0x3]: 1 @[0xf833]: 1 @[0x4]: 10 @[0x9]: 14 @[0x1]: 23 When try_to_wake_up is waking a thread executing __schedule, it can trigger the trace_sched_blocked_reason. This function attempts to obtain the thread's backtrace using __get_wchan. However, __get_wchan requires the thread to be blocking in cpu_switch_to, which is not always the case. | set_current_state(TASK_UNINTERRUPTIBLE) | __schedule | deactivate_task | p->on_rq = 0 | dequeue_task try_to_wake_up | pick_next_task ttwu_state_match(TASK_UNINTERRUPTIBLE) | context_switch p->on_rq == 0 | prepare_task_switch p->__state & TASK_UNINTERRUPTIBLE | switch_mm trace_sched_blocked_reason | __switch_to __get_wchan | ...... unwind_init_from_task | cpu_switch_to state->fp = thread_saved_fp(p) | mov x10, #THREAD_CPU_CONTEXT // Load an invalid fp | add x8, x0, x10 | ...... | stp fp, x9, [x8], #16 | str lr, [x8] | ...... | ret To resolve this, we can move the trace_sched_blocked_reason call inside __schedule to ensure accurate caller information. Fixes: 44447dec6eff ("ANDROID: sched: move blocked reason trace point to cover all class") Change-Id: Ib63839107dbe835a52f191cceef9452d13e18785 Signed-off-by: pengdonglin [jstultz: backported to android16-6.12] Signed-off-by: John Stultz --- include/trace/events/sched.h | 6 +++--- kernel/sched/core.c | 14 ++++++++++---- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 3f2e6d1d125c..1a8eb3dbc0cd 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -527,9 +527,9 @@ DEFINE_EVENT_SCHEDSTAT(sched_stat_template, sched_stat_blocked, */ TRACE_EVENT(sched_blocked_reason, - TP_PROTO(struct task_struct *tsk), + TP_PROTO(struct task_struct *tsk, void *blocked_func), - TP_ARGS(tsk), + TP_ARGS(tsk, blocked_func), TP_STRUCT__entry( __field( pid_t, pid ) @@ -539,7 +539,7 @@ TRACE_EVENT(sched_blocked_reason, TP_fast_assign( __entry->pid = tsk->pid; - __entry->caller = (void *)__get_wchan(tsk); + __entry->caller = blocked_func; __entry->io_wait = tsk->in_iowait; ), diff --git a/kernel/sched/core.c b/kernel/sched/core.c index eed0f2b8f092..df63b3db2933 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4722,9 +4722,6 @@ int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) if (READ_ONCE(p->on_rq) && ttwu_runnable(p, wake_flags)) break; - if (READ_ONCE(p->__state) & TASK_UNINTERRUPTIBLE) - trace_sched_blocked_reason(p); - #ifdef CONFIG_SMP /* * Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be @@ -7584,6 +7581,7 @@ static void __sched notrace __schedule(int sched_mode) * as a preemption by schedule_debug() and RCU. */ bool preempt = sched_mode > SM_NONE; + bool block = false; unsigned long *switch_count; unsigned long prev_state; struct rq_flags rf; @@ -7646,7 +7644,7 @@ static void __sched notrace __schedule(int sched_mode) goto picked; } } else if (!preempt && prev_state) { - try_to_block_task(rq, prev, prev_state, !task_is_blocked(prev)); + block = try_to_block_task(rq, prev, prev_state, !task_is_blocked(prev)); switch_count = &prev->nvcsw; } @@ -7719,6 +7717,14 @@ picked: trace_sched_switch(preempt, prev, next, prev_state); + if (block && (prev_state & TASK_UNINTERRUPTIBLE) + && trace_sched_blocked_reason_enabled()) { + unsigned long blocked_func = 0; + + stack_trace_save_tsk(prev, &blocked_func, 1, 0); + trace_sched_blocked_reason(prev, (void *)blocked_func); + } + /* Also unlocks the rq: */ rq = context_switch(rq, prev, next, &rf); } else {