ANDROID: Fix invalid caller output in trace_sched_blocked_reason

I encountered an issue where Perfetto sometimes fails to display the
blocked function when a thread enters the D state.

The problem arises from the caller value in the trace_sched_blocked_reason,
and I observed the following outputs while using bpftrace:

bpftrace -e 't:sched:sched_blocked_reason /args.caller > 0 &&
args.caller < 0xFFFFFFFF/ {@[args.caller]=count();}'
Attaching 1 probe...
^C

@[0x26a8]: 1
@[0x188]: 1
@[0x3]: 1
@[0xf833]: 1
@[0x4]: 10
@[0x9]: 14
@[0x1]: 23

When try_to_wake_up is waking a thread executing __schedule, it can trigger
the trace_sched_blocked_reason. This function attempts to obtain the
thread's backtrace using __get_wchan. However, __get_wchan requires the
thread to be blocking in cpu_switch_to, which is not always the case.

                                         | set_current_state(TASK_UNINTERRUPTIBLE)
                                         | __schedule
                                         |   deactivate_task
                                         |     p->on_rq = 0
                                         |     dequeue_task
try_to_wake_up                           |   pick_next_task
  ttwu_state_match(TASK_UNINTERRUPTIBLE) |   context_switch
  p->on_rq == 0                          |     prepare_task_switch
  p->__state & TASK_UNINTERRUPTIBLE      |     switch_mm
    trace_sched_blocked_reason           |     __switch_to
      __get_wchan                        |       ......
        unwind_init_from_task            |       cpu_switch_to
          state->fp = thread_saved_fp(p) |         mov x10, #THREAD_CPU_CONTEXT
            // Load an invalid fp        |         add x8, x0, x10
                                         |         ......
                                         |         stp fp, x9, [x8], #16
                                         |         str lr, [x8]
                                         |         ......
                                         |         ret

To resolve this, we can move the trace_sched_blocked_reason call inside
__schedule to ensure accurate caller information.

Fixes: 44447dec6eff ("ANDROID: sched: move blocked reason trace point to cover all class")
Change-Id: Ib63839107dbe835a52f191cceef9452d13e18785
Signed-off-by: pengdonglin <pengdonglin@xiaomi.com>
[jstultz: backported to android16-6.12]
Signed-off-by: John Stultz <jstultz@google.com>
This commit is contained in:
pengdonglin
2024-12-29 11:26:20 +08:00
committed by John Stultz
parent 5301f9b472
commit b24c1517d9
2 changed files with 13 additions and 7 deletions
+3 -3
View File
@@ -527,9 +527,9 @@ DEFINE_EVENT_SCHEDSTAT(sched_stat_template, sched_stat_blocked,
*/
TRACE_EVENT(sched_blocked_reason,
TP_PROTO(struct task_struct *tsk),
TP_PROTO(struct task_struct *tsk, void *blocked_func),
TP_ARGS(tsk),
TP_ARGS(tsk, blocked_func),
TP_STRUCT__entry(
__field( pid_t, pid )
@@ -539,7 +539,7 @@ TRACE_EVENT(sched_blocked_reason,
TP_fast_assign(
__entry->pid = tsk->pid;
__entry->caller = (void *)__get_wchan(tsk);
__entry->caller = blocked_func;
__entry->io_wait = tsk->in_iowait;
),
+10 -4
View File
@@ -4722,9 +4722,6 @@ int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
if (READ_ONCE(p->on_rq) && ttwu_runnable(p, wake_flags))
break;
if (READ_ONCE(p->__state) & TASK_UNINTERRUPTIBLE)
trace_sched_blocked_reason(p);
#ifdef CONFIG_SMP
/*
* Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be
@@ -7584,6 +7581,7 @@ static void __sched notrace __schedule(int sched_mode)
* as a preemption by schedule_debug() and RCU.
*/
bool preempt = sched_mode > SM_NONE;
bool block = false;
unsigned long *switch_count;
unsigned long prev_state;
struct rq_flags rf;
@@ -7646,7 +7644,7 @@ static void __sched notrace __schedule(int sched_mode)
goto picked;
}
} else if (!preempt && prev_state) {
try_to_block_task(rq, prev, prev_state, !task_is_blocked(prev));
block = try_to_block_task(rq, prev, prev_state, !task_is_blocked(prev));
switch_count = &prev->nvcsw;
}
@@ -7719,6 +7717,14 @@ picked:
trace_sched_switch(preempt, prev, next, prev_state);
if (block && (prev_state & TASK_UNINTERRUPTIBLE)
&& trace_sched_blocked_reason_enabled()) {
unsigned long blocked_func = 0;
stack_trace_save_tsk(prev, &blocked_func, 1, 0);
trace_sched_blocked_reason(prev, (void *)blocked_func);
}
/* Also unlocks the rq: */
rq = context_switch(rq, prev, next, &rf);
} else {