diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 60b4f582fb8c..73f49724d05b 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5103,6 +5103,34 @@ static void do_balance_callbacks(struct rq *rq, struct balance_callback *head) } } +/* + * Only called from __schedule context + * + * There are some cases where we are going to re-do the action + * that added the balance callbacks. We may not be in a state + * where we can run them, so just zap them so they can be + * properly re-added on the next time around. This is similar + * handling to running the callbacks, except we just don't call + * them. + */ +static void zap_balance_callbacks(struct rq *rq) +{ + struct balance_callback *next, *head; + bool found = false; + + lockdep_assert_rq_held(rq); + + head = rq->balance_callback; + while (head) { + if (head == &balance_push_callback) + found = true; + next = head->next; + head->next = NULL; + head = next; + } + rq->balance_callback = found ? &balance_push_callback : NULL; +} + static void balance_push(struct rq *rq); /* @@ -6654,7 +6682,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) * Otherwise marks the task's __state as RUNNING */ static bool try_to_block_task(struct rq *rq, struct task_struct *p, - unsigned long task_state) + unsigned long task_state, bool deactivate_cond) { int flags = DEQUEUE_NOCLOCK; @@ -6663,6 +6691,9 @@ static bool try_to_block_task(struct rq *rq, struct task_struct *p, return false; } + if (!deactivate_cond) + return false; + p->sched_contributes_to_load = (task_state & TASK_UNINTERRUPTIBLE) && !(task_state & TASK_NOLOAD) && @@ -6686,6 +6717,88 @@ static bool try_to_block_task(struct rq *rq, struct task_struct *p, return true; } +#ifdef CONFIG_SCHED_PROXY_EXEC + +static inline struct task_struct * +proxy_resched_idle(struct rq *rq) +{ + put_prev_task(rq, rq->donor); + rq_set_donor(rq, rq->idle); + set_next_task(rq, rq->idle); + set_tsk_need_resched(rq->idle); + return rq->idle; +} + +static bool proxy_deactivate(struct rq *rq, struct task_struct *donor) +{ + unsigned long state = READ_ONCE(donor->__state); + + /* Don't deactivate if the state has been changed to TASK_RUNNING */ + if (state == TASK_RUNNING) + return false; + /* + * Because we got donor from pick_next_task, it is *crucial* + * that we call proxy_resched_idle before we deactivate it. + * As once we deactivate donor, donor->on_rq is set to zero, + * which allows ttwu to immediately try to wake the task on + * another rq. So we cannot use *any* references to donor + * after that point. So things like cfs_rq->curr or rq->donor + * need to be changed from next *before* we deactivate. + */ + proxy_resched_idle(rq); + return try_to_block_task(rq, donor, state, true); +} + +/* + * Initial simple proxy that just returns the task if it's waking + * or deactivates the blocked task so we can pick something that + * isn't blocked. + */ +static struct task_struct * +find_proxy_task(struct rq *rq, struct task_struct *donor, struct rq_flags *rf) +{ + struct task_struct *p = donor; + struct mutex *mutex; + + mutex = p->blocked_on; + /* Something changed in the chain, so pick again */ + if (!mutex) + return NULL; + /* + * By taking mutex->wait_lock we hold off concurrent mutex_unlock() + * and ensure @owner sticks around. + */ + raw_spin_lock(&mutex->wait_lock); + raw_spin_lock(&p->blocked_lock); + + /* Check again that p is blocked with blocked_lock held */ + if (!task_is_blocked(p) || mutex != get_task_blocked_on(p)) { + /* + * Something changed in the blocked_on chain and + * we don't know if only at this level. So, let's + * just bail out completely and let __schedule + * figure things out (pick_again loop). + */ + goto out; + } + if (!proxy_deactivate(rq, donor)) + /* XXX: This hack won't work when we get to migrations */ + donor->blocked_on_state = BO_RUNNABLE; + +out: + raw_spin_unlock(&p->blocked_lock); + raw_spin_unlock(&mutex->wait_lock); + return NULL; +} +#else /* SCHED_PROXY_EXEC */ +static struct task_struct * +find_proxy_task(struct rq *rq, struct task_struct *donor, struct rq_flags *rf) +{ + WARN_ONCE(1, "This should never be called in the !SCHED_PROXY_EXEC case\n"); + return donor; +} +#endif /* SCHED_PROXY_EXEC */ + /* * __schedule() is the main scheduler function. * @@ -6794,12 +6907,22 @@ static void __sched notrace __schedule(int sched_mode) goto picked; } } else if (!preempt && prev_state) { - block = try_to_block_task(rq, prev, prev_state); + block = try_to_block_task(rq, prev, prev_state, + !task_is_blocked(prev)); switch_count = &prev->nvcsw; } - next = pick_next_task(rq, prev, &rf); +pick_again: + next = pick_next_task(rq, rq->donor, &rf); rq_set_donor(rq, next); + if (unlikely(task_is_blocked(next))) { + next = find_proxy_task(rq, next, &rf); + if (!next) { + /* zap the balance_callbacks before picking again */ + zap_balance_callbacks(rq); + goto pick_again; + } + } picked: clear_tsk_need_resched(prev); clear_preempt_need_resched(); diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 01df17e174c5..1d5910971379 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1511,9 +1511,22 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags) enqueue_rt_entity(rt_se, flags); - if (!task_current(rq, p) && p->nr_cpus_allowed > 1 && - !should_honor_rt_sync(rq, p, sync)) - enqueue_pushable_task(rq, p); + /* + * Current can't be pushed away. Selected is tied to current, + * so don't push it either. + */ + if (task_current(rq, p) || task_current_donor(rq, p)) + return; + /* + * Pinned tasks can't be pushed. + */ + if (p->nr_cpus_allowed == 1) + return; + + if (should_honor_rt_sync(rq, p, sync)) + return; + + enqueue_pushable_task(rq, p); } static bool dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index a1ddd055fbc9..63546055f54a 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2292,6 +2292,14 @@ static inline int task_current_donor(struct rq *rq, struct task_struct *p) return rq->donor == p; } +static inline bool task_is_blocked(struct task_struct *p) +{ + if (!sched_proxy_exec()) + return false; + + return !!p->blocked_on && p->blocked_on_state != BO_RUNNABLE; +} + static inline int task_on_cpu(struct rq *rq, struct task_struct *p) { #ifdef CONFIG_SMP @@ -2503,7 +2511,7 @@ static inline void put_prev_set_next_task(struct rq *rq, struct task_struct *prev, struct task_struct *next) { - WARN_ON_ONCE(rq->curr != prev); + WARN_ON_ONCE(rq->donor != prev); __put_prev_set_next_dl_server(rq, prev, next);