ANDROID: Fix invalid caller output in trace_sched_blocked_reason
I encountered an issue where Perfetto sometimes fails to display the
blocked function when a thread enters the D state.
The problem arises from the caller value in the trace_sched_blocked_reason,
and I observed the following outputs while using bpftrace:
bpftrace -e 't:sched:sched_blocked_reason /args.caller > 0 &&
args.caller < 0xFFFFFFFF/ {@[args.caller]=count();}'
Attaching 1 probe...
^C
@[0x26a8]: 1
@[0x188]: 1
@[0x3]: 1
@[0xf833]: 1
@[0x4]: 10
@[0x9]: 14
@[0x1]: 23
When try_to_wake_up is waking a thread executing __schedule, it can trigger
the trace_sched_blocked_reason. This function attempts to obtain the
thread's backtrace using __get_wchan. However, __get_wchan requires the
thread to be blocking in cpu_switch_to, which is not always the case.
| set_current_state(TASK_UNINTERRUPTIBLE)
| __schedule
| deactivate_task
| p->on_rq = 0
| dequeue_task
try_to_wake_up | pick_next_task
ttwu_state_match(TASK_UNINTERRUPTIBLE) | context_switch
p->on_rq == 0 | prepare_task_switch
p->__state & TASK_UNINTERRUPTIBLE | switch_mm
trace_sched_blocked_reason | __switch_to
__get_wchan | ......
unwind_init_from_task | cpu_switch_to
state->fp = thread_saved_fp(p) | mov x10, #THREAD_CPU_CONTEXT
// Load an invalid fp | add x8, x0, x10
| ......
| stp fp, x9, [x8], #16
| str lr, [x8]
| ......
| ret
To resolve this, we can move the trace_sched_blocked_reason call inside
__schedule to ensure accurate caller information.
Fixes: 44447dec6eff ("ANDROID: sched: move blocked reason trace point to cover all class")
Change-Id: Ib63839107dbe835a52f191cceef9452d13e18785
Signed-off-by: pengdonglin <pengdonglin@xiaomi.com>
[jstultz: backported to android16-6.12]
Signed-off-by: John Stultz <jstultz@google.com>
This commit is contained in:
@@ -527,9 +527,9 @@ DEFINE_EVENT_SCHEDSTAT(sched_stat_template, sched_stat_blocked,
|
||||
*/
|
||||
TRACE_EVENT(sched_blocked_reason,
|
||||
|
||||
TP_PROTO(struct task_struct *tsk),
|
||||
TP_PROTO(struct task_struct *tsk, void *blocked_func),
|
||||
|
||||
TP_ARGS(tsk),
|
||||
TP_ARGS(tsk, blocked_func),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( pid_t, pid )
|
||||
@@ -539,7 +539,7 @@ TRACE_EVENT(sched_blocked_reason,
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->pid = tsk->pid;
|
||||
__entry->caller = (void *)__get_wchan(tsk);
|
||||
__entry->caller = blocked_func;
|
||||
__entry->io_wait = tsk->in_iowait;
|
||||
),
|
||||
|
||||
|
||||
+10
-4
@@ -4722,9 +4722,6 @@ int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
|
||||
if (READ_ONCE(p->on_rq) && ttwu_runnable(p, wake_flags))
|
||||
break;
|
||||
|
||||
if (READ_ONCE(p->__state) & TASK_UNINTERRUPTIBLE)
|
||||
trace_sched_blocked_reason(p);
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/*
|
||||
* Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be
|
||||
@@ -7584,6 +7581,7 @@ static void __sched notrace __schedule(int sched_mode)
|
||||
* as a preemption by schedule_debug() and RCU.
|
||||
*/
|
||||
bool preempt = sched_mode > SM_NONE;
|
||||
bool block = false;
|
||||
unsigned long *switch_count;
|
||||
unsigned long prev_state;
|
||||
struct rq_flags rf;
|
||||
@@ -7646,7 +7644,7 @@ static void __sched notrace __schedule(int sched_mode)
|
||||
goto picked;
|
||||
}
|
||||
} else if (!preempt && prev_state) {
|
||||
try_to_block_task(rq, prev, prev_state, !task_is_blocked(prev));
|
||||
block = try_to_block_task(rq, prev, prev_state, !task_is_blocked(prev));
|
||||
switch_count = &prev->nvcsw;
|
||||
}
|
||||
|
||||
@@ -7719,6 +7717,14 @@ picked:
|
||||
|
||||
trace_sched_switch(preempt, prev, next, prev_state);
|
||||
|
||||
if (block && (prev_state & TASK_UNINTERRUPTIBLE)
|
||||
&& trace_sched_blocked_reason_enabled()) {
|
||||
unsigned long blocked_func = 0;
|
||||
|
||||
stack_trace_save_tsk(prev, &blocked_func, 1, 0);
|
||||
trace_sched_blocked_reason(prev, (void *)blocked_func);
|
||||
}
|
||||
|
||||
/* Also unlocks the rq: */
|
||||
rq = context_switch(rq, prev, next, &rf);
|
||||
} else {
|
||||
|
||||
Reference in New Issue
Block a user