Merge tag 'sched-core-2022-12-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar: - Implement persistent user-requested affinity: introduce affinity_context::user_mask and unconditionally preserve the user-requested CPU affinity masks, for long-lived tasks to better interact with cpusets & CPU hotplug events over longer timespans, without destroying the original affinity intent if the underlying topology changes. - Uclamp updates: fix relationship between uclamp and fits_capacity() - PSI fixes - Misc fixes & updates * tag 'sched-core-2022-12-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: sched: Clear ttwu_pending after enqueue_task() sched/psi: Use task->psi_flags to clear in CPU migration sched/psi: Stop relying on timer_pending() for poll_work rescheduling sched/psi: Fix avgs_work re-arm in psi_avgs_work() sched/psi: Fix possible missing or delayed pending event sched: Always clear user_cpus_ptr in do_set_cpus_allowed() sched: Enforce user requested affinity sched: Always preserve the user requested cpumask sched: Introduce affinity_context sched: Add __releases annotations to affine_move_task() sched/fair: Check if prev_cpu has highest spare cap in feec() sched/fair: Consider capacity inversion in util_fits_cpu() sched/fair: Detect capacity inversion sched/uclamp: Cater for uclamp in find_energy_efficient_cpu()'s early exit condition sched/uclamp: Make cpu_overutilized() use util_fits_cpu() sched/uclamp: Make asym_fits_capacity() use util_fits_cpu() sched/uclamp: Make select_idle_capacity() use util_fits_cpu() sched/uclamp: Fix fits_capacity() check in feec() sched/uclamp: Make task_fits_capacity() use util_fits_cpu() sched/uclamp: Fix relationship between uclamp and migration margin
This commit is contained in:
@@ -72,6 +72,9 @@ enum psi_states {
|
||||
/* Use one bit in the state mask to track TSK_ONCPU */
|
||||
#define PSI_ONCPU (1 << NR_PSI_STATES)
|
||||
|
||||
/* Flag whether to re-arm avgs_work, see details in get_recent_times() */
|
||||
#define PSI_STATE_RESCHEDULE (1 << (NR_PSI_STATES + 1))
|
||||
|
||||
enum psi_aggregators {
|
||||
PSI_AVGS = 0,
|
||||
PSI_POLL,
|
||||
@@ -177,6 +180,7 @@ struct psi_group {
|
||||
struct timer_list poll_timer;
|
||||
wait_queue_head_t poll_wait;
|
||||
atomic_t poll_wakeup;
|
||||
atomic_t poll_scheduled;
|
||||
|
||||
/* Protects data used by the monitor */
|
||||
struct mutex trigger_lock;
|
||||
|
||||
@@ -888,9 +888,6 @@ struct task_struct {
|
||||
unsigned sched_reset_on_fork:1;
|
||||
unsigned sched_contributes_to_load:1;
|
||||
unsigned sched_migrated:1;
|
||||
#ifdef CONFIG_PSI
|
||||
unsigned sched_psi_wake_requeue:1;
|
||||
#endif
|
||||
|
||||
/* Force alignment to the next boundary: */
|
||||
unsigned :0;
|
||||
|
||||
+163
-96
@@ -1392,7 +1392,7 @@ static inline void uclamp_idle_reset(struct rq *rq, enum uclamp_id clamp_id,
|
||||
if (!(rq->uclamp_flags & UCLAMP_FLAG_IDLE))
|
||||
return;
|
||||
|
||||
WRITE_ONCE(rq->uclamp[clamp_id].value, clamp_value);
|
||||
uclamp_rq_set(rq, clamp_id, clamp_value);
|
||||
}
|
||||
|
||||
static inline
|
||||
@@ -1543,8 +1543,8 @@ static inline void uclamp_rq_inc_id(struct rq *rq, struct task_struct *p,
|
||||
if (bucket->tasks == 1 || uc_se->value > bucket->value)
|
||||
bucket->value = uc_se->value;
|
||||
|
||||
if (uc_se->value > READ_ONCE(uc_rq->value))
|
||||
WRITE_ONCE(uc_rq->value, uc_se->value);
|
||||
if (uc_se->value > uclamp_rq_get(rq, clamp_id))
|
||||
uclamp_rq_set(rq, clamp_id, uc_se->value);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1610,7 +1610,7 @@ static inline void uclamp_rq_dec_id(struct rq *rq, struct task_struct *p,
|
||||
if (likely(bucket->tasks))
|
||||
return;
|
||||
|
||||
rq_clamp = READ_ONCE(uc_rq->value);
|
||||
rq_clamp = uclamp_rq_get(rq, clamp_id);
|
||||
/*
|
||||
* Defensive programming: this should never happen. If it happens,
|
||||
* e.g. due to future modification, warn and fixup the expected value.
|
||||
@@ -1618,7 +1618,7 @@ static inline void uclamp_rq_dec_id(struct rq *rq, struct task_struct *p,
|
||||
SCHED_WARN_ON(bucket->value > rq_clamp);
|
||||
if (bucket->value >= rq_clamp) {
|
||||
bkt_clamp = uclamp_rq_max_value(rq, clamp_id, uc_se->value);
|
||||
WRITE_ONCE(uc_rq->value, bkt_clamp);
|
||||
uclamp_rq_set(rq, clamp_id, bkt_clamp);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2053,7 +2053,7 @@ static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
|
||||
|
||||
if (!(flags & ENQUEUE_RESTORE)) {
|
||||
sched_info_enqueue(rq, p);
|
||||
psi_enqueue(p, flags & ENQUEUE_WAKEUP);
|
||||
psi_enqueue(p, (flags & ENQUEUE_WAKEUP) && !(flags & ENQUEUE_MIGRATED));
|
||||
}
|
||||
|
||||
uclamp_rq_inc(rq, p);
|
||||
@@ -2189,14 +2189,18 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
|
||||
#ifdef CONFIG_SMP
|
||||
|
||||
static void
|
||||
__do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask, u32 flags);
|
||||
__do_set_cpus_allowed(struct task_struct *p, struct affinity_context *ctx);
|
||||
|
||||
static int __set_cpus_allowed_ptr(struct task_struct *p,
|
||||
const struct cpumask *new_mask,
|
||||
u32 flags);
|
||||
struct affinity_context *ctx);
|
||||
|
||||
static void migrate_disable_switch(struct rq *rq, struct task_struct *p)
|
||||
{
|
||||
struct affinity_context ac = {
|
||||
.new_mask = cpumask_of(rq->cpu),
|
||||
.flags = SCA_MIGRATE_DISABLE,
|
||||
};
|
||||
|
||||
if (likely(!p->migration_disabled))
|
||||
return;
|
||||
|
||||
@@ -2206,7 +2210,7 @@ static void migrate_disable_switch(struct rq *rq, struct task_struct *p)
|
||||
/*
|
||||
* Violates locking rules! see comment in __do_set_cpus_allowed().
|
||||
*/
|
||||
__do_set_cpus_allowed(p, cpumask_of(rq->cpu), SCA_MIGRATE_DISABLE);
|
||||
__do_set_cpus_allowed(p, &ac);
|
||||
}
|
||||
|
||||
void migrate_disable(void)
|
||||
@@ -2228,6 +2232,10 @@ EXPORT_SYMBOL_GPL(migrate_disable);
|
||||
void migrate_enable(void)
|
||||
{
|
||||
struct task_struct *p = current;
|
||||
struct affinity_context ac = {
|
||||
.new_mask = &p->cpus_mask,
|
||||
.flags = SCA_MIGRATE_ENABLE,
|
||||
};
|
||||
|
||||
if (p->migration_disabled > 1) {
|
||||
p->migration_disabled--;
|
||||
@@ -2243,7 +2251,7 @@ void migrate_enable(void)
|
||||
*/
|
||||
preempt_disable();
|
||||
if (p->cpus_ptr != &p->cpus_mask)
|
||||
__set_cpus_allowed_ptr(p, &p->cpus_mask, SCA_MIGRATE_ENABLE);
|
||||
__set_cpus_allowed_ptr(p, &ac);
|
||||
/*
|
||||
* Mustn't clear migration_disabled() until cpus_ptr points back at the
|
||||
* regular cpus_mask, otherwise things that race (eg.
|
||||
@@ -2523,19 +2531,25 @@ out_unlock:
|
||||
* sched_class::set_cpus_allowed must do the below, but is not required to
|
||||
* actually call this function.
|
||||
*/
|
||||
void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask, u32 flags)
|
||||
void set_cpus_allowed_common(struct task_struct *p, struct affinity_context *ctx)
|
||||
{
|
||||
if (flags & (SCA_MIGRATE_ENABLE | SCA_MIGRATE_DISABLE)) {
|
||||
p->cpus_ptr = new_mask;
|
||||
if (ctx->flags & (SCA_MIGRATE_ENABLE | SCA_MIGRATE_DISABLE)) {
|
||||
p->cpus_ptr = ctx->new_mask;
|
||||
return;
|
||||
}
|
||||
|
||||
cpumask_copy(&p->cpus_mask, new_mask);
|
||||
p->nr_cpus_allowed = cpumask_weight(new_mask);
|
||||
cpumask_copy(&p->cpus_mask, ctx->new_mask);
|
||||
p->nr_cpus_allowed = cpumask_weight(ctx->new_mask);
|
||||
|
||||
/*
|
||||
* Swap in a new user_cpus_ptr if SCA_USER flag set
|
||||
*/
|
||||
if (ctx->flags & SCA_USER)
|
||||
swap(p->user_cpus_ptr, ctx->user_mask);
|
||||
}
|
||||
|
||||
static void
|
||||
__do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask, u32 flags)
|
||||
__do_set_cpus_allowed(struct task_struct *p, struct affinity_context *ctx)
|
||||
{
|
||||
struct rq *rq = task_rq(p);
|
||||
bool queued, running;
|
||||
@@ -2552,7 +2566,7 @@ __do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask, u32
|
||||
*
|
||||
* XXX do further audits, this smells like something putrid.
|
||||
*/
|
||||
if (flags & SCA_MIGRATE_DISABLE)
|
||||
if (ctx->flags & SCA_MIGRATE_DISABLE)
|
||||
SCHED_WARN_ON(!p->on_cpu);
|
||||
else
|
||||
lockdep_assert_held(&p->pi_lock);
|
||||
@@ -2571,7 +2585,7 @@ __do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask, u32
|
||||
if (running)
|
||||
put_prev_task(rq, p);
|
||||
|
||||
p->sched_class->set_cpus_allowed(p, new_mask, flags);
|
||||
p->sched_class->set_cpus_allowed(p, ctx);
|
||||
|
||||
if (queued)
|
||||
enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK);
|
||||
@@ -2579,14 +2593,27 @@ __do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask, u32
|
||||
set_next_task(rq, p);
|
||||
}
|
||||
|
||||
/*
|
||||
* Used for kthread_bind() and select_fallback_rq(), in both cases the user
|
||||
* affinity (if any) should be destroyed too.
|
||||
*/
|
||||
void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
|
||||
{
|
||||
__do_set_cpus_allowed(p, new_mask, 0);
|
||||
struct affinity_context ac = {
|
||||
.new_mask = new_mask,
|
||||
.user_mask = NULL,
|
||||
.flags = SCA_USER, /* clear the user requested mask */
|
||||
};
|
||||
|
||||
__do_set_cpus_allowed(p, &ac);
|
||||
kfree(ac.user_mask);
|
||||
}
|
||||
|
||||
int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src,
|
||||
int node)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
if (!src->user_cpus_ptr)
|
||||
return 0;
|
||||
|
||||
@@ -2594,7 +2621,10 @@ int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src,
|
||||
if (!dst->user_cpus_ptr)
|
||||
return -ENOMEM;
|
||||
|
||||
/* Use pi_lock to protect content of user_cpus_ptr */
|
||||
raw_spin_lock_irqsave(&src->pi_lock, flags);
|
||||
cpumask_copy(dst->user_cpus_ptr, src->user_cpus_ptr);
|
||||
raw_spin_unlock_irqrestore(&src->pi_lock, flags);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -2690,6 +2720,8 @@ void release_user_cpus_ptr(struct task_struct *p)
|
||||
*/
|
||||
static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flags *rf,
|
||||
int dest_cpu, unsigned int flags)
|
||||
__releases(rq->lock)
|
||||
__releases(p->pi_lock)
|
||||
{
|
||||
struct set_affinity_pending my_pending = { }, *pending = NULL;
|
||||
bool stop_pending, complete = false;
|
||||
@@ -2832,8 +2864,7 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag
|
||||
* Called with both p->pi_lock and rq->lock held; drops both before returning.
|
||||
*/
|
||||
static int __set_cpus_allowed_ptr_locked(struct task_struct *p,
|
||||
const struct cpumask *new_mask,
|
||||
u32 flags,
|
||||
struct affinity_context *ctx,
|
||||
struct rq *rq,
|
||||
struct rq_flags *rf)
|
||||
__releases(rq->lock)
|
||||
@@ -2842,7 +2873,6 @@ static int __set_cpus_allowed_ptr_locked(struct task_struct *p,
|
||||
const struct cpumask *cpu_allowed_mask = task_cpu_possible_mask(p);
|
||||
const struct cpumask *cpu_valid_mask = cpu_active_mask;
|
||||
bool kthread = p->flags & PF_KTHREAD;
|
||||
struct cpumask *user_mask = NULL;
|
||||
unsigned int dest_cpu;
|
||||
int ret = 0;
|
||||
|
||||
@@ -2862,7 +2892,7 @@ static int __set_cpus_allowed_ptr_locked(struct task_struct *p,
|
||||
cpu_valid_mask = cpu_online_mask;
|
||||
}
|
||||
|
||||
if (!kthread && !cpumask_subset(new_mask, cpu_allowed_mask)) {
|
||||
if (!kthread && !cpumask_subset(ctx->new_mask, cpu_allowed_mask)) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
@@ -2871,18 +2901,18 @@ static int __set_cpus_allowed_ptr_locked(struct task_struct *p,
|
||||
* Must re-check here, to close a race against __kthread_bind(),
|
||||
* sched_setaffinity() is not guaranteed to observe the flag.
|
||||
*/
|
||||
if ((flags & SCA_CHECK) && (p->flags & PF_NO_SETAFFINITY)) {
|
||||
if ((ctx->flags & SCA_CHECK) && (p->flags & PF_NO_SETAFFINITY)) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!(flags & SCA_MIGRATE_ENABLE)) {
|
||||
if (cpumask_equal(&p->cpus_mask, new_mask))
|
||||
if (!(ctx->flags & SCA_MIGRATE_ENABLE)) {
|
||||
if (cpumask_equal(&p->cpus_mask, ctx->new_mask))
|
||||
goto out;
|
||||
|
||||
if (WARN_ON_ONCE(p == current &&
|
||||
is_migration_disabled(p) &&
|
||||
!cpumask_test_cpu(task_cpu(p), new_mask))) {
|
||||
!cpumask_test_cpu(task_cpu(p), ctx->new_mask))) {
|
||||
ret = -EBUSY;
|
||||
goto out;
|
||||
}
|
||||
@@ -2893,22 +2923,15 @@ static int __set_cpus_allowed_ptr_locked(struct task_struct *p,
|
||||
* for groups of tasks (ie. cpuset), so that load balancing is not
|
||||
* immediately required to distribute the tasks within their new mask.
|
||||
*/
|
||||
dest_cpu = cpumask_any_and_distribute(cpu_valid_mask, new_mask);
|
||||
dest_cpu = cpumask_any_and_distribute(cpu_valid_mask, ctx->new_mask);
|
||||
if (dest_cpu >= nr_cpu_ids) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
__do_set_cpus_allowed(p, new_mask, flags);
|
||||
__do_set_cpus_allowed(p, ctx);
|
||||
|
||||
if (flags & SCA_USER)
|
||||
user_mask = clear_user_cpus_ptr(p);
|
||||
|
||||
ret = affine_move_task(rq, p, rf, dest_cpu, flags);
|
||||
|
||||
kfree(user_mask);
|
||||
|
||||
return ret;
|
||||
return affine_move_task(rq, p, rf, dest_cpu, ctx->flags);
|
||||
|
||||
out:
|
||||
task_rq_unlock(rq, p, rf);
|
||||
@@ -2926,25 +2949,41 @@ out:
|
||||
* call is not atomic; no spinlocks may be held.
|
||||
*/
|
||||
static int __set_cpus_allowed_ptr(struct task_struct *p,
|
||||
const struct cpumask *new_mask, u32 flags)
|
||||
struct affinity_context *ctx)
|
||||
{
|
||||
struct rq_flags rf;
|
||||
struct rq *rq;
|
||||
|
||||
rq = task_rq_lock(p, &rf);
|
||||
return __set_cpus_allowed_ptr_locked(p, new_mask, flags, rq, &rf);
|
||||
/*
|
||||
* Masking should be skipped if SCA_USER or any of the SCA_MIGRATE_*
|
||||
* flags are set.
|
||||
*/
|
||||
if (p->user_cpus_ptr &&
|
||||
!(ctx->flags & (SCA_USER | SCA_MIGRATE_ENABLE | SCA_MIGRATE_DISABLE)) &&
|
||||
cpumask_and(rq->scratch_mask, ctx->new_mask, p->user_cpus_ptr))
|
||||
ctx->new_mask = rq->scratch_mask;
|
||||
|
||||
return __set_cpus_allowed_ptr_locked(p, ctx, rq, &rf);
|
||||
}
|
||||
|
||||
int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
|
||||
{
|
||||
return __set_cpus_allowed_ptr(p, new_mask, 0);
|
||||
struct affinity_context ac = {
|
||||
.new_mask = new_mask,
|
||||
.flags = 0,
|
||||
};
|
||||
|
||||
return __set_cpus_allowed_ptr(p, &ac);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
|
||||
|
||||
/*
|
||||
* Change a given task's CPU affinity to the intersection of its current
|
||||
* affinity mask and @subset_mask, writing the resulting mask to @new_mask
|
||||
* and pointing @p->user_cpus_ptr to a copy of the old mask.
|
||||
* affinity mask and @subset_mask, writing the resulting mask to @new_mask.
|
||||
* If user_cpus_ptr is defined, use it as the basis for restricting CPU
|
||||
* affinity or use cpu_online_mask instead.
|
||||
*
|
||||
* If the resulting mask is empty, leave the affinity unchanged and return
|
||||
* -EINVAL.
|
||||
*/
|
||||
@@ -2952,17 +2991,14 @@ static int restrict_cpus_allowed_ptr(struct task_struct *p,
|
||||
struct cpumask *new_mask,
|
||||
const struct cpumask *subset_mask)
|
||||
{
|
||||
struct cpumask *user_mask = NULL;
|
||||
struct affinity_context ac = {
|
||||
.new_mask = new_mask,
|
||||
.flags = 0,
|
||||
};
|
||||
struct rq_flags rf;
|
||||
struct rq *rq;
|
||||
int err;
|
||||
|
||||
if (!p->user_cpus_ptr) {
|
||||
user_mask = kmalloc(cpumask_size(), GFP_KERNEL);
|
||||
if (!user_mask)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
rq = task_rq_lock(p, &rf);
|
||||
|
||||
/*
|
||||
@@ -2975,31 +3011,21 @@ static int restrict_cpus_allowed_ptr(struct task_struct *p,
|
||||
goto err_unlock;
|
||||
}
|
||||
|
||||
if (!cpumask_and(new_mask, &p->cpus_mask, subset_mask)) {
|
||||
if (!cpumask_and(new_mask, task_user_cpus(p), subset_mask)) {
|
||||
err = -EINVAL;
|
||||
goto err_unlock;
|
||||
}
|
||||
|
||||
/*
|
||||
* We're about to butcher the task affinity, so keep track of what
|
||||
* the user asked for in case we're able to restore it later on.
|
||||
*/
|
||||
if (user_mask) {
|
||||
cpumask_copy(user_mask, p->cpus_ptr);
|
||||
p->user_cpus_ptr = user_mask;
|
||||
}
|
||||
|
||||
return __set_cpus_allowed_ptr_locked(p, new_mask, 0, rq, &rf);
|
||||
return __set_cpus_allowed_ptr_locked(p, &ac, rq, &rf);
|
||||
|
||||
err_unlock:
|
||||
task_rq_unlock(rq, p, &rf);
|
||||
kfree(user_mask);
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* Restrict the CPU affinity of task @p so that it is a subset of
|
||||
* task_cpu_possible_mask() and point @p->user_cpu_ptr to a copy of the
|
||||
* task_cpu_possible_mask() and point @p->user_cpus_ptr to a copy of the
|
||||
* old affinity mask. If the resulting mask is empty, we warn and walk
|
||||
* up the cpuset hierarchy until we find a suitable mask.
|
||||
*/
|
||||
@@ -3043,34 +3069,29 @@ out_free_mask:
|
||||
}
|
||||
|
||||
static int
|
||||
__sched_setaffinity(struct task_struct *p, const struct cpumask *mask);
|
||||
__sched_setaffinity(struct task_struct *p, struct affinity_context *ctx);
|
||||
|
||||
/*
|
||||
* Restore the affinity of a task @p which was previously restricted by a
|
||||
* call to force_compatible_cpus_allowed_ptr(). This will clear (and free)
|
||||
* @p->user_cpus_ptr.
|
||||
* call to force_compatible_cpus_allowed_ptr().
|
||||
*
|
||||
* It is the caller's responsibility to serialise this with any calls to
|
||||
* force_compatible_cpus_allowed_ptr(@p).
|
||||
*/
|
||||
void relax_compatible_cpus_allowed_ptr(struct task_struct *p)
|
||||
{
|
||||
struct cpumask *user_mask = p->user_cpus_ptr;
|
||||
unsigned long flags;
|
||||
struct affinity_context ac = {
|
||||
.new_mask = task_user_cpus(p),
|
||||
.flags = 0,
|
||||
};
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* Try to restore the old affinity mask. If this fails, then
|
||||
* we free the mask explicitly to avoid it being inherited across
|
||||
* a subsequent fork().
|
||||
* Try to restore the old affinity mask with __sched_setaffinity().
|
||||
* Cpuset masking will be done there too.
|
||||
*/
|
||||
if (!user_mask || !__sched_setaffinity(p, user_mask))
|
||||
return;
|
||||
|
||||
raw_spin_lock_irqsave(&p->pi_lock, flags);
|
||||
user_mask = clear_user_cpus_ptr(p);
|
||||
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
|
||||
|
||||
kfree(user_mask);
|
||||
ret = __sched_setaffinity(p, &ac);
|
||||
WARN_ON_ONCE(ret);
|
||||
}
|
||||
|
||||
void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
|
||||
@@ -3548,10 +3569,9 @@ void sched_set_stop_task(int cpu, struct task_struct *stop)
|
||||
#else /* CONFIG_SMP */
|
||||
|
||||
static inline int __set_cpus_allowed_ptr(struct task_struct *p,
|
||||
const struct cpumask *new_mask,
|
||||
u32 flags)
|
||||
struct affinity_context *ctx)
|
||||
{
|
||||
return set_cpus_allowed_ptr(p, new_mask);
|
||||
return set_cpus_allowed_ptr(p, ctx->new_mask);
|
||||
}
|
||||
|
||||
static inline void migrate_disable_switch(struct rq *rq, struct task_struct *p) { }
|
||||
@@ -3719,13 +3739,6 @@ void sched_ttwu_pending(void *arg)
|
||||
if (!llist)
|
||||
return;
|
||||
|
||||
/*
|
||||
* rq::ttwu_pending racy indication of out-standing wakeups.
|
||||
* Races such that false-negatives are possible, since they
|
||||
* are shorter lived that false-positives would be.
|
||||
*/
|
||||
WRITE_ONCE(rq->ttwu_pending, 0);
|
||||
|
||||
rq_lock_irqsave(rq, &rf);
|
||||
update_rq_clock(rq);
|
||||
|
||||
@@ -3739,6 +3752,17 @@ void sched_ttwu_pending(void *arg)
|
||||
ttwu_do_activate(rq, p, p->sched_remote_wakeup ? WF_MIGRATED : 0, &rf);
|
||||
}
|
||||
|
||||
/*
|
||||
* Must be after enqueueing at least once task such that
|
||||
* idle_cpu() does not observe a false-negative -- if it does,
|
||||
* it is possible for select_idle_siblings() to stack a number
|
||||
* of tasks on this CPU during that window.
|
||||
*
|
||||
* It is ok to clear ttwu_pending when another task pending.
|
||||
* We will receive IPI after local irq enabled and then enqueue it.
|
||||
* Since now nr_running > 0, idle_cpu() will always get correct result.
|
||||
*/
|
||||
WRITE_ONCE(rq->ttwu_pending, 0);
|
||||
rq_unlock_irqrestore(rq, &rf);
|
||||
}
|
||||
|
||||
@@ -8106,7 +8130,7 @@ int dl_task_check_affinity(struct task_struct *p, const struct cpumask *mask)
|
||||
#endif
|
||||
|
||||
static int
|
||||
__sched_setaffinity(struct task_struct *p, const struct cpumask *mask)
|
||||
__sched_setaffinity(struct task_struct *p, struct affinity_context *ctx)
|
||||
{
|
||||
int retval;
|
||||
cpumask_var_t cpus_allowed, new_mask;
|
||||
@@ -8120,13 +8144,16 @@ __sched_setaffinity(struct task_struct *p, const struct cpumask *mask)
|
||||
}
|
||||
|
||||
cpuset_cpus_allowed(p, cpus_allowed);
|
||||
cpumask_and(new_mask, mask, cpus_allowed);
|
||||
cpumask_and(new_mask, ctx->new_mask, cpus_allowed);
|
||||
|
||||
ctx->new_mask = new_mask;
|
||||
ctx->flags |= SCA_CHECK;
|
||||
|
||||
retval = dl_task_check_affinity(p, new_mask);
|
||||
if (retval)
|
||||
goto out_free_new_mask;
|
||||
again:
|
||||
retval = __set_cpus_allowed_ptr(p, new_mask, SCA_CHECK | SCA_USER);
|
||||
|
||||
retval = __set_cpus_allowed_ptr(p, ctx);
|
||||
if (retval)
|
||||
goto out_free_new_mask;
|
||||
|
||||
@@ -8137,7 +8164,24 @@ again:
|
||||
* Just reset the cpumask to the cpuset's cpus_allowed.
|
||||
*/
|
||||
cpumask_copy(new_mask, cpus_allowed);
|
||||
goto again;
|
||||
|
||||
/*
|
||||
* If SCA_USER is set, a 2nd call to __set_cpus_allowed_ptr()
|
||||
* will restore the previous user_cpus_ptr value.
|
||||
*
|
||||
* In the unlikely event a previous user_cpus_ptr exists,
|
||||
* we need to further restrict the mask to what is allowed
|
||||
* by that old user_cpus_ptr.
|
||||
*/
|
||||
if (unlikely((ctx->flags & SCA_USER) && ctx->user_mask)) {
|
||||
bool empty = !cpumask_and(new_mask, new_mask,
|
||||
ctx->user_mask);
|
||||
|
||||
if (WARN_ON_ONCE(empty))
|
||||
cpumask_copy(new_mask, cpus_allowed);
|
||||
}
|
||||
__set_cpus_allowed_ptr(p, ctx);
|
||||
retval = -EINVAL;
|
||||
}
|
||||
|
||||
out_free_new_mask:
|
||||
@@ -8149,6 +8193,8 @@ out_free_cpus_allowed:
|
||||
|
||||
long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
|
||||
{
|
||||
struct affinity_context ac;
|
||||
struct cpumask *user_mask;
|
||||
struct task_struct *p;
|
||||
int retval;
|
||||
|
||||
@@ -8183,7 +8229,21 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
|
||||
if (retval)
|
||||
goto out_put_task;
|
||||
|
||||
retval = __sched_setaffinity(p, in_mask);
|
||||
user_mask = kmalloc(cpumask_size(), GFP_KERNEL);
|
||||
if (!user_mask) {
|
||||
retval = -ENOMEM;
|
||||
goto out_put_task;
|
||||
}
|
||||
cpumask_copy(user_mask, in_mask);
|
||||
ac = (struct affinity_context){
|
||||
.new_mask = in_mask,
|
||||
.user_mask = user_mask,
|
||||
.flags = SCA_USER,
|
||||
};
|
||||
|
||||
retval = __sched_setaffinity(p, &ac);
|
||||
kfree(ac.user_mask);
|
||||
|
||||
out_put_task:
|
||||
put_task_struct(p);
|
||||
return retval;
|
||||
@@ -8964,6 +9024,12 @@ void show_state_filter(unsigned int state_filter)
|
||||
*/
|
||||
void __init init_idle(struct task_struct *idle, int cpu)
|
||||
{
|
||||
#ifdef CONFIG_SMP
|
||||
struct affinity_context ac = (struct affinity_context) {
|
||||
.new_mask = cpumask_of(cpu),
|
||||
.flags = 0,
|
||||
};
|
||||
#endif
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
unsigned long flags;
|
||||
|
||||
@@ -8988,7 +9054,7 @@ void __init init_idle(struct task_struct *idle, int cpu)
|
||||
*
|
||||
* And since this is boot we can forgo the serialization.
|
||||
*/
|
||||
set_cpus_allowed_common(idle, cpumask_of(cpu), 0);
|
||||
set_cpus_allowed_common(idle, &ac);
|
||||
#endif
|
||||
/*
|
||||
* We're having a chicken and egg problem, even though we are
|
||||
@@ -9775,6 +9841,7 @@ void __init sched_init(void)
|
||||
|
||||
rq->core_cookie = 0UL;
|
||||
#endif
|
||||
zalloc_cpumask_var_node(&rq->scratch_mask, GFP_KERNEL, cpu_to_node(i));
|
||||
}
|
||||
|
||||
set_load_weight(&init_task, false);
|
||||
|
||||
@@ -2485,8 +2485,7 @@ static void task_woken_dl(struct rq *rq, struct task_struct *p)
|
||||
}
|
||||
|
||||
static void set_cpus_allowed_dl(struct task_struct *p,
|
||||
const struct cpumask *new_mask,
|
||||
u32 flags)
|
||||
struct affinity_context *ctx)
|
||||
{
|
||||
struct root_domain *src_rd;
|
||||
struct rq *rq;
|
||||
@@ -2501,7 +2500,7 @@ static void set_cpus_allowed_dl(struct task_struct *p,
|
||||
* update. We already made space for us in the destination
|
||||
* domain (see cpuset_can_attach()).
|
||||
*/
|
||||
if (!cpumask_intersects(src_rd->span, new_mask)) {
|
||||
if (!cpumask_intersects(src_rd->span, ctx->new_mask)) {
|
||||
struct dl_bw *src_dl_b;
|
||||
|
||||
src_dl_b = dl_bw_of(cpu_of(rq));
|
||||
@@ -2515,7 +2514,7 @@ static void set_cpus_allowed_dl(struct task_struct *p,
|
||||
raw_spin_unlock(&src_dl_b->lock);
|
||||
}
|
||||
|
||||
set_cpus_allowed_common(p, new_mask, flags);
|
||||
set_cpus_allowed_common(p, ctx);
|
||||
}
|
||||
|
||||
/* Assumes rq->lock is held */
|
||||
|
||||
+264
-39
@@ -4280,14 +4280,16 @@ static inline unsigned long task_util_est(struct task_struct *p)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_UCLAMP_TASK
|
||||
static inline unsigned long uclamp_task_util(struct task_struct *p)
|
||||
static inline unsigned long uclamp_task_util(struct task_struct *p,
|
||||
unsigned long uclamp_min,
|
||||
unsigned long uclamp_max)
|
||||
{
|
||||
return clamp(task_util_est(p),
|
||||
uclamp_eff_value(p, UCLAMP_MIN),
|
||||
uclamp_eff_value(p, UCLAMP_MAX));
|
||||
return clamp(task_util_est(p), uclamp_min, uclamp_max);
|
||||
}
|
||||
#else
|
||||
static inline unsigned long uclamp_task_util(struct task_struct *p)
|
||||
static inline unsigned long uclamp_task_util(struct task_struct *p,
|
||||
unsigned long uclamp_min,
|
||||
unsigned long uclamp_max)
|
||||
{
|
||||
return task_util_est(p);
|
||||
}
|
||||
@@ -4426,10 +4428,139 @@ done:
|
||||
trace_sched_util_est_se_tp(&p->se);
|
||||
}
|
||||
|
||||
static inline int task_fits_capacity(struct task_struct *p,
|
||||
unsigned long capacity)
|
||||
static inline int util_fits_cpu(unsigned long util,
|
||||
unsigned long uclamp_min,
|
||||
unsigned long uclamp_max,
|
||||
int cpu)
|
||||
{
|
||||
return fits_capacity(uclamp_task_util(p), capacity);
|
||||
unsigned long capacity_orig, capacity_orig_thermal;
|
||||
unsigned long capacity = capacity_of(cpu);
|
||||
bool fits, uclamp_max_fits;
|
||||
|
||||
/*
|
||||
* Check if the real util fits without any uclamp boost/cap applied.
|
||||
*/
|
||||
fits = fits_capacity(util, capacity);
|
||||
|
||||
if (!uclamp_is_used())
|
||||
return fits;
|
||||
|
||||
/*
|
||||
* We must use capacity_orig_of() for comparing against uclamp_min and
|
||||
* uclamp_max. We only care about capacity pressure (by using
|
||||
* capacity_of()) for comparing against the real util.
|
||||
*
|
||||
* If a task is boosted to 1024 for example, we don't want a tiny
|
||||
* pressure to skew the check whether it fits a CPU or not.
|
||||
*
|
||||
* Similarly if a task is capped to capacity_orig_of(little_cpu), it
|
||||
* should fit a little cpu even if there's some pressure.
|
||||
*
|
||||
* Only exception is for thermal pressure since it has a direct impact
|
||||
* on available OPP of the system.
|
||||
*
|
||||
* We honour it for uclamp_min only as a drop in performance level
|
||||
* could result in not getting the requested minimum performance level.
|
||||
*
|
||||
* For uclamp_max, we can tolerate a drop in performance level as the
|
||||
* goal is to cap the task. So it's okay if it's getting less.
|
||||
*
|
||||
* In case of capacity inversion we should honour the inverted capacity
|
||||
* for both uclamp_min and uclamp_max all the time.
|
||||
*/
|
||||
capacity_orig = cpu_in_capacity_inversion(cpu);
|
||||
if (capacity_orig) {
|
||||
capacity_orig_thermal = capacity_orig;
|
||||
} else {
|
||||
capacity_orig = capacity_orig_of(cpu);
|
||||
capacity_orig_thermal = capacity_orig - arch_scale_thermal_pressure(cpu);
|
||||
}
|
||||
|
||||
/*
|
||||
* We want to force a task to fit a cpu as implied by uclamp_max.
|
||||
* But we do have some corner cases to cater for..
|
||||
*
|
||||
*
|
||||
* C=z
|
||||
* | ___
|
||||
* | C=y | |
|
||||
* |_ _ _ _ _ _ _ _ _ ___ _ _ _ | _ | _ _ _ _ _ uclamp_max
|
||||
* | C=x | | | |
|
||||
* | ___ | | | |
|
||||
* | | | | | | | (util somewhere in this region)
|
||||
* | | | | | | |
|
||||
* | | | | | | |
|
||||
* +----------------------------------------
|
||||
* cpu0 cpu1 cpu2
|
||||
*
|
||||
* In the above example if a task is capped to a specific performance
|
||||
* point, y, then when:
|
||||
*
|
||||
* * util = 80% of x then it does not fit on cpu0 and should migrate
|
||||
* to cpu1
|
||||
* * util = 80% of y then it is forced to fit on cpu1 to honour
|
||||
* uclamp_max request.
|
||||
*
|
||||
* which is what we're enforcing here. A task always fits if
|
||||
* uclamp_max <= capacity_orig. But when uclamp_max > capacity_orig,
|
||||
* the normal upmigration rules should withhold still.
|
||||
*
|
||||
* Only exception is when we are on max capacity, then we need to be
|
||||
* careful not to block overutilized state. This is so because:
|
||||
*
|
||||
* 1. There's no concept of capping at max_capacity! We can't go
|
||||
* beyond this performance level anyway.
|
||||
* 2. The system is being saturated when we're operating near
|
||||
* max capacity, it doesn't make sense to block overutilized.
|
||||
*/
|
||||
uclamp_max_fits = (capacity_orig == SCHED_CAPACITY_SCALE) && (uclamp_max == SCHED_CAPACITY_SCALE);
|
||||
uclamp_max_fits = !uclamp_max_fits && (uclamp_max <= capacity_orig);
|
||||
fits = fits || uclamp_max_fits;
|
||||
|
||||
/*
|
||||
*
|
||||
* C=z
|
||||
* | ___ (region a, capped, util >= uclamp_max)
|
||||
* | C=y | |
|
||||
* |_ _ _ _ _ _ _ _ _ ___ _ _ _ | _ | _ _ _ _ _ uclamp_max
|
||||
* | C=x | | | |
|
||||
* | ___ | | | | (region b, uclamp_min <= util <= uclamp_max)
|
||||
* |_ _ _|_ _|_ _ _ _| _ | _ _ _| _ | _ _ _ _ _ uclamp_min
|
||||
* | | | | | | |
|
||||
* | | | | | | | (region c, boosted, util < uclamp_min)
|
||||
* +----------------------------------------
|
||||
* cpu0 cpu1 cpu2
|
||||
*
|
||||
* a) If util > uclamp_max, then we're capped, we don't care about
|
||||
* actual fitness value here. We only care if uclamp_max fits
|
||||
* capacity without taking margin/pressure into account.
|
||||
* See comment above.
|
||||
*
|
||||
* b) If uclamp_min <= util <= uclamp_max, then the normal
|
||||
* fits_capacity() rules apply. Except we need to ensure that we
|
||||
* enforce we remain within uclamp_max, see comment above.
|
||||
*
|
||||
* c) If util < uclamp_min, then we are boosted. Same as (b) but we
|
||||
* need to take into account the boosted value fits the CPU without
|
||||
* taking margin/pressure into account.
|
||||
*
|
||||
* Cases (a) and (b) are handled in the 'fits' variable already. We
|
||||
* just need to consider an extra check for case (c) after ensuring we
|
||||
* handle the case uclamp_min > uclamp_max.
|
||||
*/
|
||||
uclamp_min = min(uclamp_min, uclamp_max);
|
||||
if (util < uclamp_min && capacity_orig != SCHED_CAPACITY_SCALE)
|
||||
fits = fits && (uclamp_min <= capacity_orig_thermal);
|
||||
|
||||
return fits;
|
||||
}
|
||||
|
||||
static inline int task_fits_cpu(struct task_struct *p, int cpu)
|
||||
{
|
||||
unsigned long uclamp_min = uclamp_eff_value(p, UCLAMP_MIN);
|
||||
unsigned long uclamp_max = uclamp_eff_value(p, UCLAMP_MAX);
|
||||
unsigned long util = task_util_est(p);
|
||||
return util_fits_cpu(util, uclamp_min, uclamp_max, cpu);
|
||||
}
|
||||
|
||||
static inline void update_misfit_status(struct task_struct *p, struct rq *rq)
|
||||
@@ -4442,7 +4573,7 @@ static inline void update_misfit_status(struct task_struct *p, struct rq *rq)
|
||||
return;
|
||||
}
|
||||
|
||||
if (task_fits_capacity(p, capacity_of(cpu_of(rq)))) {
|
||||
if (task_fits_cpu(p, cpu_of(rq))) {
|
||||
rq->misfit_task_load = 0;
|
||||
return;
|
||||
}
|
||||
@@ -5862,7 +5993,10 @@ static inline void hrtick_update(struct rq *rq)
|
||||
#ifdef CONFIG_SMP
|
||||
static inline bool cpu_overutilized(int cpu)
|
||||
{
|
||||
return !fits_capacity(cpu_util_cfs(cpu), capacity_of(cpu));
|
||||
unsigned long rq_util_min = uclamp_rq_get(cpu_rq(cpu), UCLAMP_MIN);
|
||||
unsigned long rq_util_max = uclamp_rq_get(cpu_rq(cpu), UCLAMP_MAX);
|
||||
|
||||
return !util_fits_cpu(cpu_util_cfs(cpu), rq_util_min, rq_util_max, cpu);
|
||||
}
|
||||
|
||||
static inline void update_overutilized_status(struct rq *rq)
|
||||
@@ -6654,21 +6788,23 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool
|
||||
static int
|
||||
select_idle_capacity(struct task_struct *p, struct sched_domain *sd, int target)
|
||||
{
|
||||
unsigned long task_util, best_cap = 0;
|
||||
unsigned long task_util, util_min, util_max, best_cap = 0;
|
||||
int cpu, best_cpu = -1;
|
||||
struct cpumask *cpus;
|
||||
|
||||
cpus = this_cpu_cpumask_var_ptr(select_rq_mask);
|
||||
cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
|
||||
|
||||
task_util = uclamp_task_util(p);
|
||||
task_util = task_util_est(p);
|
||||
util_min = uclamp_eff_value(p, UCLAMP_MIN);
|
||||
util_max = uclamp_eff_value(p, UCLAMP_MAX);
|
||||
|
||||
for_each_cpu_wrap(cpu, cpus, target) {
|
||||
unsigned long cpu_cap = capacity_of(cpu);
|
||||
|
||||
if (!available_idle_cpu(cpu) && !sched_idle_cpu(cpu))
|
||||
continue;
|
||||
if (fits_capacity(task_util, cpu_cap))
|
||||
if (util_fits_cpu(task_util, util_min, util_max, cpu))
|
||||
return cpu;
|
||||
|
||||
if (cpu_cap > best_cap) {
|
||||
@@ -6680,10 +6816,13 @@ select_idle_capacity(struct task_struct *p, struct sched_domain *sd, int target)
|
||||
return best_cpu;
|
||||
}
|
||||
|
||||
static inline bool asym_fits_capacity(unsigned long task_util, int cpu)
|
||||
static inline bool asym_fits_cpu(unsigned long util,
|
||||
unsigned long util_min,
|
||||
unsigned long util_max,
|
||||
int cpu)
|
||||
{
|
||||
if (sched_asym_cpucap_active())
|
||||
return fits_capacity(task_util, capacity_of(cpu));
|
||||
return util_fits_cpu(util, util_min, util_max, cpu);
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -6695,7 +6834,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
|
||||
{
|
||||
bool has_idle_core = false;
|
||||
struct sched_domain *sd;
|
||||
unsigned long task_util;
|
||||
unsigned long task_util, util_min, util_max;
|
||||
int i, recent_used_cpu;
|
||||
|
||||
/*
|
||||
@@ -6704,7 +6843,9 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
|
||||
*/
|
||||
if (sched_asym_cpucap_active()) {
|
||||
sync_entity_load_avg(&p->se);
|
||||
task_util = uclamp_task_util(p);
|
||||
task_util = task_util_est(p);
|
||||
util_min = uclamp_eff_value(p, UCLAMP_MIN);
|
||||
util_max = uclamp_eff_value(p, UCLAMP_MAX);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -6713,7 +6854,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
|
||||
lockdep_assert_irqs_disabled();
|
||||
|
||||
if ((available_idle_cpu(target) || sched_idle_cpu(target)) &&
|
||||
asym_fits_capacity(task_util, target))
|
||||
asym_fits_cpu(task_util, util_min, util_max, target))
|
||||
return target;
|
||||
|
||||
/*
|
||||
@@ -6721,7 +6862,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
|
||||
*/
|
||||
if (prev != target && cpus_share_cache(prev, target) &&
|
||||
(available_idle_cpu(prev) || sched_idle_cpu(prev)) &&
|
||||
asym_fits_capacity(task_util, prev))
|
||||
asym_fits_cpu(task_util, util_min, util_max, prev))
|
||||
return prev;
|
||||
|
||||
/*
|
||||
@@ -6736,7 +6877,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
|
||||
in_task() &&
|
||||
prev == smp_processor_id() &&
|
||||
this_rq()->nr_running <= 1 &&
|
||||
asym_fits_capacity(task_util, prev)) {
|
||||
asym_fits_cpu(task_util, util_min, util_max, prev)) {
|
||||
return prev;
|
||||
}
|
||||
|
||||
@@ -6748,7 +6889,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
|
||||
cpus_share_cache(recent_used_cpu, target) &&
|
||||
(available_idle_cpu(recent_used_cpu) || sched_idle_cpu(recent_used_cpu)) &&
|
||||
cpumask_test_cpu(p->recent_used_cpu, p->cpus_ptr) &&
|
||||
asym_fits_capacity(task_util, recent_used_cpu)) {
|
||||
asym_fits_cpu(task_util, util_min, util_max, recent_used_cpu)) {
|
||||
return recent_used_cpu;
|
||||
}
|
||||
|
||||
@@ -7044,6 +7185,8 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
|
||||
{
|
||||
struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_rq_mask);
|
||||
unsigned long prev_delta = ULONG_MAX, best_delta = ULONG_MAX;
|
||||
unsigned long p_util_min = uclamp_is_used() ? uclamp_eff_value(p, UCLAMP_MIN) : 0;
|
||||
unsigned long p_util_max = uclamp_is_used() ? uclamp_eff_value(p, UCLAMP_MAX) : 1024;
|
||||
struct root_domain *rd = this_rq()->rd;
|
||||
int cpu, best_energy_cpu, target = -1;
|
||||
struct sched_domain *sd;
|
||||
@@ -7068,7 +7211,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
|
||||
target = prev_cpu;
|
||||
|
||||
sync_entity_load_avg(&p->se);
|
||||
if (!task_util_est(p))
|
||||
if (!uclamp_task_util(p, p_util_min, p_util_max))
|
||||
goto unlock;
|
||||
|
||||
eenv_task_busy_time(&eenv, p, prev_cpu);
|
||||
@@ -7076,7 +7219,9 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
|
||||
for (; pd; pd = pd->next) {
|
||||
unsigned long cpu_cap, cpu_thermal_cap, util;
|
||||
unsigned long cur_delta, max_spare_cap = 0;
|
||||
bool compute_prev_delta = false;
|
||||
unsigned long rq_util_min, rq_util_max;
|
||||
unsigned long util_min, util_max;
|
||||
unsigned long prev_spare_cap = 0;
|
||||
int max_spare_cap_cpu = -1;
|
||||
unsigned long base_energy;
|
||||
|
||||
@@ -7112,26 +7257,45 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
|
||||
* much capacity we can get out of the CPU; this is
|
||||
* aligned with sched_cpu_util().
|
||||
*/
|
||||
util = uclamp_rq_util_with(cpu_rq(cpu), util, p);
|
||||
if (!fits_capacity(util, cpu_cap))
|
||||
if (uclamp_is_used()) {
|
||||
if (uclamp_rq_is_idle(cpu_rq(cpu))) {
|
||||
util_min = p_util_min;
|
||||
util_max = p_util_max;
|
||||
} else {
|
||||
/*
|
||||
* Open code uclamp_rq_util_with() except for
|
||||
* the clamp() part. Ie: apply max aggregation
|
||||
* only. util_fits_cpu() logic requires to
|
||||
* operate on non clamped util but must use the
|
||||
* max-aggregated uclamp_{min, max}.
|
||||
*/
|
||||
rq_util_min = uclamp_rq_get(cpu_rq(cpu), UCLAMP_MIN);
|
||||
rq_util_max = uclamp_rq_get(cpu_rq(cpu), UCLAMP_MAX);
|
||||
|
||||
util_min = max(rq_util_min, p_util_min);
|
||||
util_max = max(rq_util_max, p_util_max);
|
||||
}
|
||||
}
|
||||
if (!util_fits_cpu(util, util_min, util_max, cpu))
|
||||
continue;
|
||||
|
||||
lsub_positive(&cpu_cap, util);
|
||||
|
||||
if (cpu == prev_cpu) {
|
||||
/* Always use prev_cpu as a candidate. */
|
||||
compute_prev_delta = true;
|
||||
prev_spare_cap = cpu_cap;
|
||||
} else if (cpu_cap > max_spare_cap) {
|
||||
/*
|
||||
* Find the CPU with the maximum spare capacity
|
||||
* in the performance domain.
|
||||
* among the remaining CPUs in the performance
|
||||
* domain.
|
||||
*/
|
||||
max_spare_cap = cpu_cap;
|
||||
max_spare_cap_cpu = cpu;
|
||||
}
|
||||
}
|
||||
|
||||
if (max_spare_cap_cpu < 0 && !compute_prev_delta)
|
||||
if (max_spare_cap_cpu < 0 && prev_spare_cap == 0)
|
||||
continue;
|
||||
|
||||
eenv_pd_busy_time(&eenv, cpus, p);
|
||||
@@ -7139,7 +7303,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
|
||||
base_energy = compute_energy(&eenv, pd, cpus, p, -1);
|
||||
|
||||
/* Evaluate the energy impact of using prev_cpu. */
|
||||
if (compute_prev_delta) {
|
||||
if (prev_spare_cap > 0) {
|
||||
prev_delta = compute_energy(&eenv, pd, cpus, p,
|
||||
prev_cpu);
|
||||
/* CPU utilization has changed */
|
||||
@@ -7150,7 +7314,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
|
||||
}
|
||||
|
||||
/* Evaluate the energy impact of using max_spare_cap_cpu. */
|
||||
if (max_spare_cap_cpu >= 0) {
|
||||
if (max_spare_cap_cpu >= 0 && max_spare_cap > prev_spare_cap) {
|
||||
cur_delta = compute_energy(&eenv, pd, cpus, p,
|
||||
max_spare_cap_cpu);
|
||||
/* CPU utilization has changed */
|
||||
@@ -8276,7 +8440,7 @@ static int detach_tasks(struct lb_env *env)
|
||||
|
||||
case migrate_misfit:
|
||||
/* This is not a misfit task */
|
||||
if (task_fits_capacity(p, capacity_of(env->src_cpu)))
|
||||
if (task_fits_cpu(p, env->src_cpu))
|
||||
goto next;
|
||||
|
||||
env->imbalance = 0;
|
||||
@@ -8665,16 +8829,73 @@ static unsigned long scale_rt_capacity(int cpu)
|
||||
|
||||
static void update_cpu_capacity(struct sched_domain *sd, int cpu)
|
||||
{
|
||||
unsigned long capacity_orig = arch_scale_cpu_capacity(cpu);
|
||||
unsigned long capacity = scale_rt_capacity(cpu);
|
||||
struct sched_group *sdg = sd->groups;
|
||||
struct rq *rq = cpu_rq(cpu);
|
||||
|
||||
cpu_rq(cpu)->cpu_capacity_orig = arch_scale_cpu_capacity(cpu);
|
||||
rq->cpu_capacity_orig = capacity_orig;
|
||||
|
||||
if (!capacity)
|
||||
capacity = 1;
|
||||
|
||||
cpu_rq(cpu)->cpu_capacity = capacity;
|
||||
trace_sched_cpu_capacity_tp(cpu_rq(cpu));
|
||||
rq->cpu_capacity = capacity;
|
||||
|
||||
/*
|
||||
* Detect if the performance domain is in capacity inversion state.
|
||||
*
|
||||
* Capacity inversion happens when another perf domain with equal or
|
||||
* lower capacity_orig_of() ends up having higher capacity than this
|
||||
* domain after subtracting thermal pressure.
|
||||
*
|
||||
* We only take into account thermal pressure in this detection as it's
|
||||
* the only metric that actually results in *real* reduction of
|
||||
* capacity due to performance points (OPPs) being dropped/become
|
||||
* unreachable due to thermal throttling.
|
||||
*
|
||||
* We assume:
|
||||
* * That all cpus in a perf domain have the same capacity_orig
|
||||
* (same uArch).
|
||||
* * Thermal pressure will impact all cpus in this perf domain
|
||||
* equally.
|
||||
*/
|
||||
if (static_branch_unlikely(&sched_asym_cpucapacity)) {
|
||||
unsigned long inv_cap = capacity_orig - thermal_load_avg(rq);
|
||||
struct perf_domain *pd = rcu_dereference(rq->rd->pd);
|
||||
|
||||
rq->cpu_capacity_inverted = 0;
|
||||
|
||||
for (; pd; pd = pd->next) {
|
||||
struct cpumask *pd_span = perf_domain_span(pd);
|
||||
unsigned long pd_cap_orig, pd_cap;
|
||||
|
||||
cpu = cpumask_any(pd_span);
|
||||
pd_cap_orig = arch_scale_cpu_capacity(cpu);
|
||||
|
||||
if (capacity_orig < pd_cap_orig)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* handle the case of multiple perf domains have the
|
||||
* same capacity_orig but one of them is under higher
|
||||
* thermal pressure. We record it as capacity
|
||||
* inversion.
|
||||
*/
|
||||
if (capacity_orig == pd_cap_orig) {
|
||||
pd_cap = pd_cap_orig - thermal_load_avg(cpu_rq(cpu));
|
||||
|
||||
if (pd_cap > inv_cap) {
|
||||
rq->cpu_capacity_inverted = inv_cap;
|
||||
break;
|
||||
}
|
||||
} else if (pd_cap_orig > inv_cap) {
|
||||
rq->cpu_capacity_inverted = inv_cap;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
trace_sched_cpu_capacity_tp(rq);
|
||||
|
||||
sdg->sgc->capacity = capacity;
|
||||
sdg->sgc->min_capacity = capacity;
|
||||
@@ -9281,6 +9502,10 @@ static inline void update_sg_wakeup_stats(struct sched_domain *sd,
|
||||
|
||||
memset(sgs, 0, sizeof(*sgs));
|
||||
|
||||
/* Assume that task can't fit any CPU of the group */
|
||||
if (sd->flags & SD_ASYM_CPUCAPACITY)
|
||||
sgs->group_misfit_task_load = 1;
|
||||
|
||||
for_each_cpu(i, sched_group_span(group)) {
|
||||
struct rq *rq = cpu_rq(i);
|
||||
unsigned int local;
|
||||
@@ -9300,12 +9525,12 @@ static inline void update_sg_wakeup_stats(struct sched_domain *sd,
|
||||
if (!nr_running && idle_cpu_without(i, p))
|
||||
sgs->idle_cpus++;
|
||||
|
||||
}
|
||||
/* Check if task fits in the CPU */
|
||||
if (sd->flags & SD_ASYM_CPUCAPACITY &&
|
||||
sgs->group_misfit_task_load &&
|
||||
task_fits_cpu(p, i))
|
||||
sgs->group_misfit_task_load = 0;
|
||||
|
||||
/* Check if task fits in the group */
|
||||
if (sd->flags & SD_ASYM_CPUCAPACITY &&
|
||||
!task_fits_capacity(p, group->sgc->max_capacity)) {
|
||||
sgs->group_misfit_task_load = 1;
|
||||
}
|
||||
|
||||
sgs->group_capacity = group->sgc->capacity;
|
||||
|
||||
+84
-16
@@ -189,6 +189,7 @@ static void group_init(struct psi_group *group)
|
||||
INIT_DELAYED_WORK(&group->avgs_work, psi_avgs_work);
|
||||
mutex_init(&group->avgs_lock);
|
||||
/* Init trigger-related members */
|
||||
atomic_set(&group->poll_scheduled, 0);
|
||||
mutex_init(&group->trigger_lock);
|
||||
INIT_LIST_HEAD(&group->triggers);
|
||||
group->poll_min_period = U32_MAX;
|
||||
@@ -242,6 +243,8 @@ static void get_recent_times(struct psi_group *group, int cpu,
|
||||
u32 *pchanged_states)
|
||||
{
|
||||
struct psi_group_cpu *groupc = per_cpu_ptr(group->pcpu, cpu);
|
||||
int current_cpu = raw_smp_processor_id();
|
||||
unsigned int tasks[NR_PSI_TASK_COUNTS];
|
||||
u64 now, state_start;
|
||||
enum psi_states s;
|
||||
unsigned int seq;
|
||||
@@ -256,6 +259,8 @@ static void get_recent_times(struct psi_group *group, int cpu,
|
||||
memcpy(times, groupc->times, sizeof(groupc->times));
|
||||
state_mask = groupc->state_mask;
|
||||
state_start = groupc->state_start;
|
||||
if (cpu == current_cpu)
|
||||
memcpy(tasks, groupc->tasks, sizeof(groupc->tasks));
|
||||
} while (read_seqcount_retry(&groupc->seq, seq));
|
||||
|
||||
/* Calculate state time deltas against the previous snapshot */
|
||||
@@ -280,6 +285,28 @@ static void get_recent_times(struct psi_group *group, int cpu,
|
||||
if (delta)
|
||||
*pchanged_states |= (1 << s);
|
||||
}
|
||||
|
||||
/*
|
||||
* When collect_percpu_times() from the avgs_work, we don't want to
|
||||
* re-arm avgs_work when all CPUs are IDLE. But the current CPU running
|
||||
* this avgs_work is never IDLE, cause avgs_work can't be shut off.
|
||||
* So for the current CPU, we need to re-arm avgs_work only when
|
||||
* (NR_RUNNING > 1 || NR_IOWAIT > 0 || NR_MEMSTALL > 0), for other CPUs
|
||||
* we can just check PSI_NONIDLE delta.
|
||||
*/
|
||||
if (current_work() == &group->avgs_work.work) {
|
||||
bool reschedule;
|
||||
|
||||
if (cpu == current_cpu)
|
||||
reschedule = tasks[NR_RUNNING] +
|
||||
tasks[NR_IOWAIT] +
|
||||
tasks[NR_MEMSTALL] > 1;
|
||||
else
|
||||
reschedule = *pchanged_states & (1 << PSI_NONIDLE);
|
||||
|
||||
if (reschedule)
|
||||
*pchanged_states |= PSI_STATE_RESCHEDULE;
|
||||
}
|
||||
}
|
||||
|
||||
static void calc_avgs(unsigned long avg[3], int missed_periods,
|
||||
@@ -415,7 +442,6 @@ static void psi_avgs_work(struct work_struct *work)
|
||||
struct delayed_work *dwork;
|
||||
struct psi_group *group;
|
||||
u32 changed_states;
|
||||
bool nonidle;
|
||||
u64 now;
|
||||
|
||||
dwork = to_delayed_work(work);
|
||||
@@ -426,7 +452,6 @@ static void psi_avgs_work(struct work_struct *work)
|
||||
now = sched_clock();
|
||||
|
||||
collect_percpu_times(group, PSI_AVGS, &changed_states);
|
||||
nonidle = changed_states & (1 << PSI_NONIDLE);
|
||||
/*
|
||||
* If there is task activity, periodically fold the per-cpu
|
||||
* times and feed samples into the running averages. If things
|
||||
@@ -437,7 +462,7 @@ static void psi_avgs_work(struct work_struct *work)
|
||||
if (now >= group->avg_next_update)
|
||||
group->avg_next_update = update_averages(group, now);
|
||||
|
||||
if (nonidle) {
|
||||
if (changed_states & PSI_STATE_RESCHEDULE) {
|
||||
schedule_delayed_work(dwork, nsecs_to_jiffies(
|
||||
group->avg_next_update - now) + 1);
|
||||
}
|
||||
@@ -539,10 +564,12 @@ static u64 update_triggers(struct psi_group *group, u64 now)
|
||||
|
||||
/* Calculate growth since last update */
|
||||
growth = window_update(&t->win, now, total[t->state]);
|
||||
if (growth < t->threshold)
|
||||
continue;
|
||||
if (!t->pending_event) {
|
||||
if (growth < t->threshold)
|
||||
continue;
|
||||
|
||||
t->pending_event = true;
|
||||
t->pending_event = true;
|
||||
}
|
||||
}
|
||||
/* Limit event signaling to once per window */
|
||||
if (now < t->last_event_time + t->win.size)
|
||||
@@ -563,18 +590,17 @@ static u64 update_triggers(struct psi_group *group, u64 now)
|
||||
return now + group->poll_min_period;
|
||||
}
|
||||
|
||||
/* Schedule polling if it's not already scheduled. */
|
||||
static void psi_schedule_poll_work(struct psi_group *group, unsigned long delay)
|
||||
/* Schedule polling if it's not already scheduled or forced. */
|
||||
static void psi_schedule_poll_work(struct psi_group *group, unsigned long delay,
|
||||
bool force)
|
||||
{
|
||||
struct task_struct *task;
|
||||
|
||||
/*
|
||||
* Do not reschedule if already scheduled.
|
||||
* Possible race with a timer scheduled after this check but before
|
||||
* mod_timer below can be tolerated because group->polling_next_update
|
||||
* will keep updates on schedule.
|
||||
* atomic_xchg should be called even when !force to provide a
|
||||
* full memory barrier (see the comment inside psi_poll_work).
|
||||
*/
|
||||
if (timer_pending(&group->poll_timer))
|
||||
if (atomic_xchg(&group->poll_scheduled, 1) && !force)
|
||||
return;
|
||||
|
||||
rcu_read_lock();
|
||||
@@ -586,12 +612,15 @@ static void psi_schedule_poll_work(struct psi_group *group, unsigned long delay)
|
||||
*/
|
||||
if (likely(task))
|
||||
mod_timer(&group->poll_timer, jiffies + delay);
|
||||
else
|
||||
atomic_set(&group->poll_scheduled, 0);
|
||||
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static void psi_poll_work(struct psi_group *group)
|
||||
{
|
||||
bool force_reschedule = false;
|
||||
u32 changed_states;
|
||||
u64 now;
|
||||
|
||||
@@ -599,6 +628,43 @@ static void psi_poll_work(struct psi_group *group)
|
||||
|
||||
now = sched_clock();
|
||||
|
||||
if (now > group->polling_until) {
|
||||
/*
|
||||
* We are either about to start or might stop polling if no
|
||||
* state change was recorded. Resetting poll_scheduled leaves
|
||||
* a small window for psi_group_change to sneak in and schedule
|
||||
* an immediate poll_work before we get to rescheduling. One
|
||||
* potential extra wakeup at the end of the polling window
|
||||
* should be negligible and polling_next_update still keeps
|
||||
* updates correctly on schedule.
|
||||
*/
|
||||
atomic_set(&group->poll_scheduled, 0);
|
||||
/*
|
||||
* A task change can race with the poll worker that is supposed to
|
||||
* report on it. To avoid missing events, ensure ordering between
|
||||
* poll_scheduled and the task state accesses, such that if the poll
|
||||
* worker misses the state update, the task change is guaranteed to
|
||||
* reschedule the poll worker:
|
||||
*
|
||||
* poll worker:
|
||||
* atomic_set(poll_scheduled, 0)
|
||||
* smp_mb()
|
||||
* LOAD states
|
||||
*
|
||||
* task change:
|
||||
* STORE states
|
||||
* if atomic_xchg(poll_scheduled, 1) == 0:
|
||||
* schedule poll worker
|
||||
*
|
||||
* The atomic_xchg() implies a full barrier.
|
||||
*/
|
||||
smp_mb();
|
||||
} else {
|
||||
/* Polling window is not over, keep rescheduling */
|
||||
force_reschedule = true;
|
||||
}
|
||||
|
||||
|
||||
collect_percpu_times(group, PSI_POLL, &changed_states);
|
||||
|
||||
if (changed_states & group->poll_states) {
|
||||
@@ -624,7 +690,8 @@ static void psi_poll_work(struct psi_group *group)
|
||||
group->polling_next_update = update_triggers(group, now);
|
||||
|
||||
psi_schedule_poll_work(group,
|
||||
nsecs_to_jiffies(group->polling_next_update - now) + 1);
|
||||
nsecs_to_jiffies(group->polling_next_update - now) + 1,
|
||||
force_reschedule);
|
||||
|
||||
out:
|
||||
mutex_unlock(&group->trigger_lock);
|
||||
@@ -785,7 +852,7 @@ static void psi_group_change(struct psi_group *group, int cpu,
|
||||
write_seqcount_end(&groupc->seq);
|
||||
|
||||
if (state_mask & group->poll_states)
|
||||
psi_schedule_poll_work(group, 1);
|
||||
psi_schedule_poll_work(group, 1, false);
|
||||
|
||||
if (wake_clock && !delayed_work_pending(&group->avgs_work))
|
||||
schedule_delayed_work(&group->avgs_work, PSI_FREQ);
|
||||
@@ -939,7 +1006,7 @@ void psi_account_irqtime(struct task_struct *task, u32 delta)
|
||||
write_seqcount_end(&groupc->seq);
|
||||
|
||||
if (group->poll_states & (1 << PSI_IRQ_FULL))
|
||||
psi_schedule_poll_work(group, 1);
|
||||
psi_schedule_poll_work(group, 1, false);
|
||||
} while ((group = group->parent));
|
||||
}
|
||||
#endif
|
||||
@@ -1325,6 +1392,7 @@ void psi_trigger_destroy(struct psi_trigger *t)
|
||||
* can no longer be found through group->poll_task.
|
||||
*/
|
||||
kthread_stop(task_to_destroy);
|
||||
atomic_set(&group->poll_scheduled, 0);
|
||||
}
|
||||
kfree(t);
|
||||
}
|
||||
|
||||
+85
-7
@@ -1041,6 +1041,7 @@ struct rq {
|
||||
|
||||
unsigned long cpu_capacity;
|
||||
unsigned long cpu_capacity_orig;
|
||||
unsigned long cpu_capacity_inverted;
|
||||
|
||||
struct balance_callback *balance_callback;
|
||||
|
||||
@@ -1150,6 +1151,9 @@ struct rq {
|
||||
unsigned int core_forceidle_occupation;
|
||||
u64 core_forceidle_start;
|
||||
#endif
|
||||
|
||||
/* Scratch cpumask to be temporarily used under rq_lock */
|
||||
cpumask_var_t scratch_mask;
|
||||
};
|
||||
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
@@ -1877,6 +1881,13 @@ static inline void dirty_sched_domain_sysctl(int cpu)
|
||||
#endif
|
||||
|
||||
extern int sched_update_scaling(void);
|
||||
|
||||
static inline const struct cpumask *task_user_cpus(struct task_struct *p)
|
||||
{
|
||||
if (!p->user_cpus_ptr)
|
||||
return cpu_possible_mask; /* &init_task.cpus_mask */
|
||||
return p->user_cpus_ptr;
|
||||
}
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
#include "stats.h"
|
||||
@@ -2144,6 +2155,12 @@ extern const u32 sched_prio_to_wmult[40];
|
||||
|
||||
#define RETRY_TASK ((void *)-1UL)
|
||||
|
||||
struct affinity_context {
|
||||
const struct cpumask *new_mask;
|
||||
struct cpumask *user_mask;
|
||||
unsigned int flags;
|
||||
};
|
||||
|
||||
struct sched_class {
|
||||
|
||||
#ifdef CONFIG_UCLAMP_TASK
|
||||
@@ -2172,9 +2189,7 @@ struct sched_class {
|
||||
|
||||
void (*task_woken)(struct rq *this_rq, struct task_struct *task);
|
||||
|
||||
void (*set_cpus_allowed)(struct task_struct *p,
|
||||
const struct cpumask *newmask,
|
||||
u32 flags);
|
||||
void (*set_cpus_allowed)(struct task_struct *p, struct affinity_context *ctx);
|
||||
|
||||
void (*rq_online)(struct rq *rq);
|
||||
void (*rq_offline)(struct rq *rq);
|
||||
@@ -2285,7 +2300,7 @@ extern void update_group_capacity(struct sched_domain *sd, int cpu);
|
||||
|
||||
extern void trigger_load_balance(struct rq *rq);
|
||||
|
||||
extern void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask, u32 flags);
|
||||
extern void set_cpus_allowed_common(struct task_struct *p, struct affinity_context *ctx);
|
||||
|
||||
static inline struct task_struct *get_push_task(struct rq *rq)
|
||||
{
|
||||
@@ -2878,6 +2893,24 @@ static inline unsigned long capacity_orig_of(int cpu)
|
||||
return cpu_rq(cpu)->cpu_capacity_orig;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns inverted capacity if the CPU is in capacity inversion state.
|
||||
* 0 otherwise.
|
||||
*
|
||||
* Capacity inversion detection only considers thermal impact where actual
|
||||
* performance points (OPPs) gets dropped.
|
||||
*
|
||||
* Capacity inversion state happens when another performance domain that has
|
||||
* equal or lower capacity_orig_of() becomes effectively larger than the perf
|
||||
* domain this CPU belongs to due to thermal pressure throttling it hard.
|
||||
*
|
||||
* See comment in update_cpu_capacity().
|
||||
*/
|
||||
static inline unsigned long cpu_in_capacity_inversion(int cpu)
|
||||
{
|
||||
return cpu_rq(cpu)->cpu_capacity_inverted;
|
||||
}
|
||||
|
||||
/**
|
||||
* enum cpu_util_type - CPU utilization type
|
||||
* @FREQUENCY_UTIL: Utilization used to select frequency
|
||||
@@ -2979,6 +3012,23 @@ static inline unsigned long cpu_util_rt(struct rq *rq)
|
||||
#ifdef CONFIG_UCLAMP_TASK
|
||||
unsigned long uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id);
|
||||
|
||||
static inline unsigned long uclamp_rq_get(struct rq *rq,
|
||||
enum uclamp_id clamp_id)
|
||||
{
|
||||
return READ_ONCE(rq->uclamp[clamp_id].value);
|
||||
}
|
||||
|
||||
static inline void uclamp_rq_set(struct rq *rq, enum uclamp_id clamp_id,
|
||||
unsigned int value)
|
||||
{
|
||||
WRITE_ONCE(rq->uclamp[clamp_id].value, value);
|
||||
}
|
||||
|
||||
static inline bool uclamp_rq_is_idle(struct rq *rq)
|
||||
{
|
||||
return rq->uclamp_flags & UCLAMP_FLAG_IDLE;
|
||||
}
|
||||
|
||||
/**
|
||||
* uclamp_rq_util_with - clamp @util with @rq and @p effective uclamp values.
|
||||
* @rq: The rq to clamp against. Must not be NULL.
|
||||
@@ -3014,12 +3064,12 @@ unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util,
|
||||
* Ignore last runnable task's max clamp, as this task will
|
||||
* reset it. Similarly, no need to read the rq's min clamp.
|
||||
*/
|
||||
if (rq->uclamp_flags & UCLAMP_FLAG_IDLE)
|
||||
if (uclamp_rq_is_idle(rq))
|
||||
goto out;
|
||||
}
|
||||
|
||||
min_util = max_t(unsigned long, min_util, READ_ONCE(rq->uclamp[UCLAMP_MIN].value));
|
||||
max_util = max_t(unsigned long, max_util, READ_ONCE(rq->uclamp[UCLAMP_MAX].value));
|
||||
min_util = max_t(unsigned long, min_util, uclamp_rq_get(rq, UCLAMP_MIN));
|
||||
max_util = max_t(unsigned long, max_util, uclamp_rq_get(rq, UCLAMP_MAX));
|
||||
out:
|
||||
/*
|
||||
* Since CPU's {min,max}_util clamps are MAX aggregated considering
|
||||
@@ -3060,6 +3110,15 @@ static inline bool uclamp_is_used(void)
|
||||
return static_branch_likely(&sched_uclamp_used);
|
||||
}
|
||||
#else /* CONFIG_UCLAMP_TASK */
|
||||
static inline unsigned long uclamp_eff_value(struct task_struct *p,
|
||||
enum uclamp_id clamp_id)
|
||||
{
|
||||
if (clamp_id == UCLAMP_MIN)
|
||||
return 0;
|
||||
|
||||
return SCHED_CAPACITY_SCALE;
|
||||
}
|
||||
|
||||
static inline
|
||||
unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util,
|
||||
struct task_struct *p)
|
||||
@@ -3073,6 +3132,25 @@ static inline bool uclamp_is_used(void)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline unsigned long uclamp_rq_get(struct rq *rq,
|
||||
enum uclamp_id clamp_id)
|
||||
{
|
||||
if (clamp_id == UCLAMP_MIN)
|
||||
return 0;
|
||||
|
||||
return SCHED_CAPACITY_SCALE;
|
||||
}
|
||||
|
||||
static inline void uclamp_rq_set(struct rq *rq, enum uclamp_id clamp_id,
|
||||
unsigned int value)
|
||||
{
|
||||
}
|
||||
|
||||
static inline bool uclamp_rq_is_idle(struct rq *rq)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif /* CONFIG_UCLAMP_TASK */
|
||||
|
||||
#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
|
||||
|
||||
+4
-18
@@ -128,11 +128,9 @@ static inline void psi_enqueue(struct task_struct *p, bool wakeup)
|
||||
if (p->in_memstall)
|
||||
set |= TSK_MEMSTALL_RUNNING;
|
||||
|
||||
if (!wakeup || p->sched_psi_wake_requeue) {
|
||||
if (!wakeup) {
|
||||
if (p->in_memstall)
|
||||
set |= TSK_MEMSTALL;
|
||||
if (p->sched_psi_wake_requeue)
|
||||
p->sched_psi_wake_requeue = 0;
|
||||
} else {
|
||||
if (p->in_iowait)
|
||||
clear |= TSK_IOWAIT;
|
||||
@@ -143,8 +141,6 @@ static inline void psi_enqueue(struct task_struct *p, bool wakeup)
|
||||
|
||||
static inline void psi_dequeue(struct task_struct *p, bool sleep)
|
||||
{
|
||||
int clear = TSK_RUNNING;
|
||||
|
||||
if (static_branch_likely(&psi_disabled))
|
||||
return;
|
||||
|
||||
@@ -157,10 +153,7 @@ static inline void psi_dequeue(struct task_struct *p, bool sleep)
|
||||
if (sleep)
|
||||
return;
|
||||
|
||||
if (p->in_memstall)
|
||||
clear |= (TSK_MEMSTALL | TSK_MEMSTALL_RUNNING);
|
||||
|
||||
psi_task_change(p, clear, 0);
|
||||
psi_task_change(p, p->psi_flags, 0);
|
||||
}
|
||||
|
||||
static inline void psi_ttwu_dequeue(struct task_struct *p)
|
||||
@@ -172,19 +165,12 @@ static inline void psi_ttwu_dequeue(struct task_struct *p)
|
||||
* deregister its sleep-persistent psi states from the old
|
||||
* queue, and let psi_enqueue() know it has to requeue.
|
||||
*/
|
||||
if (unlikely(p->in_iowait || p->in_memstall)) {
|
||||
if (unlikely(p->psi_flags)) {
|
||||
struct rq_flags rf;
|
||||
struct rq *rq;
|
||||
int clear = 0;
|
||||
|
||||
if (p->in_iowait)
|
||||
clear |= TSK_IOWAIT;
|
||||
if (p->in_memstall)
|
||||
clear |= TSK_MEMSTALL;
|
||||
|
||||
rq = __task_rq_lock(p, &rf);
|
||||
psi_task_change(p, clear, 0);
|
||||
p->sched_psi_wake_requeue = 1;
|
||||
psi_task_change(p, p->psi_flags, 0);
|
||||
__task_rq_unlock(rq, &rf);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user