ANDROID: sched: Push execution and scheduler context split into deadline and rt

In preparation for chain migration, push the awareness of the split between execution and scheduler context down into some of the rt/deadline code paths that deal with load balancing. This patch was broken out from a larger chain migration patch originally by Connor O'Brien. Cc: Joel Fernandes <joelaf@google.com> Cc: Qais Yousef <qyousef@layalina.io> Cc: Ingo Molnar <mingo@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Juri Lelli <juri.lelli@redhat.com> Cc: Vincent Guittot <vincent.guittot@linaro.org> Cc: Dietmar Eggemann <dietmar.eggemann@arm.com> Cc: Valentin Schneider <vschneid@redhat.com> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Ben Segall <bsegall@google.com> Cc: Zimuzo Ezeozue <zezeozue@google.com> Cc: Mel Gorman <mgorman@suse.de> Cc: Will Deacon <will@kernel.org> Cc: Waiman Long <longman@redhat.com> Cc: Boqun Feng <boqun.feng@gmail.com> Cc: "Paul E. McKenney" <paulmck@kernel.org> Cc: Metin Kaya <Metin.Kaya@arm.com> Cc: Xuewen Yan <xuewen.yan94@gmail.com> Cc: K Prateek Nayak <kprateek.nayak@amd.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Daniel Lezcano <daniel.lezcano@linaro.org> Cc: kernel-team@android.com Change-Id: Ie71af403e471c201f4eb8a65fe422e738b51d61b Signed-off-by: Connor O'Brien <connoro@google.com> [jstultz: split out from larger chain migration patch] Signed-off-by: John Stultz <jstultz@google.com> Bug: 306081722 --- v8: * Move null exec_ctx check to a later patch, as suggested by Metin v9: * Folded down cpupri null check changes from later patch, as suggested by Metin
2023-11-08 06:40:51 +00:00
parent 6d7a9ac0d5
commit eb7630f95f
6 changed files with 59 additions and 44 deletions
@@ -113,13 +113,13 @@ static inline int cpudl_maximum(struct cpudl *cp)
 *
 * Returns: int - CPUs were found
 */
-int cpudl_find(struct cpudl *cp, struct task_struct *p,
+int cpudl_find(struct cpudl *cp, struct task_struct *sched_ctx, struct task_struct *exec_ctx,
 	       struct cpumask *later_mask)
 {
-	const struct sched_dl_entity *dl_se = &p->dl;
+	const struct sched_dl_entity *dl_se = &sched_ctx->dl;

 	if (later_mask &&
-	    cpumask_and(later_mask, cp->free_cpus, &p->cpus_mask)) {
+	    cpumask_and(later_mask, cp->free_cpus, &exec_ctx->cpus_mask)) {
 		unsigned long cap, max_cap = 0;
 		int cpu, max_cpu = -1;

@@ -128,13 +128,13 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,

 		/* Ensure the capacity of the CPUs fits the task. */
 		for_each_cpu(cpu, later_mask) {
-			if (!dl_task_fits_capacity(p, cpu)) {
+			if (!dl_task_fits_capacity(sched_ctx, cpu)) {
 				cpumask_clear_cpu(cpu, later_mask);

 				cap = arch_scale_cpu_capacity(cpu);

 				if (cap > max_cap ||
-				    (cpu == task_cpu(p) && cap == max_cap)) {
+				    (cpu == task_cpu(exec_ctx) && cap == max_cap)) {
 					max_cap = cap;
 					max_cpu = cpu;
 				}
@@ -150,7 +150,7 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,

 		WARN_ON(best_cpu != -1 && !cpu_present(best_cpu));

-		if (cpumask_test_cpu(best_cpu, &p->cpus_mask) &&
+		if (cpumask_test_cpu(best_cpu, &exec_ctx->cpus_mask) &&
 		    dl_time_before(dl_se->deadline, cp->elements[0].dl)) {
 			if (later_mask)
 				cpumask_set_cpu(best_cpu, later_mask);
@@ -16,7 +16,8 @@ struct cpudl {
 };

 #ifdef CONFIG_SMP
-int  cpudl_find(struct cpudl *cp, struct task_struct *p, struct cpumask *later_mask);
+int  cpudl_find(struct cpudl *cp, struct task_struct *sched_ctx,
+		struct task_struct *exec_ctx, struct cpumask *later_mask);
 void cpudl_set(struct cpudl *cp, int cpu, u64 dl);
 void cpudl_clear(struct cpudl *cp, int cpu);
 int  cpudl_init(struct cpudl *cp);
@@ -96,12 +96,17 @@ static inline int __cpupri_find(struct cpupri *cp, struct task_struct *p,
 	if (skip)
 		return 0;

-	if (cpumask_any_and(&p->cpus_mask, vec->mask) >= nr_cpu_ids)
+	if ((p && cpumask_any_and(&p->cpus_mask, vec->mask) >= nr_cpu_ids) ||
+	    (!p && cpumask_any(vec->mask) >= nr_cpu_ids))
 		return 0;

 	if (lowest_mask) {
-		cpumask_and(lowest_mask, &p->cpus_mask, vec->mask);
-		cpumask_and(lowest_mask, lowest_mask, cpu_active_mask);
+		if (p) {
+			cpumask_and(lowest_mask, &p->cpus_mask, vec->mask);
+			cpumask_and(lowest_mask, lowest_mask, cpu_active_mask);
+		} else {
+			cpumask_copy(lowest_mask, vec->mask);
+		}

 		/*
 		 * We have to ensure that we have at least one bit
@@ -118,10 +123,11 @@ static inline int __cpupri_find(struct cpupri *cp, struct task_struct *p,
 	return 1;
 }

-int cpupri_find(struct cpupri *cp, struct task_struct *p,
+int cpupri_find(struct cpupri *cp, struct task_struct *sched_ctx,
+		struct task_struct *exec_ctx,
 		struct cpumask *lowest_mask)
 {
-	return cpupri_find_fitness(cp, p, lowest_mask, NULL);
+	return cpupri_find_fitness(cp, sched_ctx, exec_ctx, lowest_mask, NULL);
 }

 /**
@@ -141,18 +147,19 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,
 *
 * Return: (int)bool - CPUs were found
 */
-int cpupri_find_fitness(struct cpupri *cp, struct task_struct *p,
-		struct cpumask *lowest_mask,
-		bool (*fitness_fn)(struct task_struct *p, int cpu))
+int cpupri_find_fitness(struct cpupri *cp, struct task_struct *sched_ctx,
+			struct task_struct *exec_ctx,
+			struct cpumask *lowest_mask,
+			bool (*fitness_fn)(struct task_struct *p, int cpu))
 {
-	int task_pri = convert_prio(p->prio);
+	int task_pri = convert_prio(sched_ctx->prio);
 	int idx, cpu;

 	WARN_ON_ONCE(task_pri >= CPUPRI_NR_PRIORITIES);

 	for (idx = 0; idx < task_pri; idx++) {

-		if (!__cpupri_find(cp, p, lowest_mask, idx))
+		if (!__cpupri_find(cp, exec_ctx, lowest_mask, idx))
 			continue;

 		if (!lowest_mask || !fitness_fn)
@@ -160,7 +167,7 @@ int cpupri_find_fitness(struct cpupri *cp, struct task_struct *p,

 		/* Ensure the capacity of the CPUs fit the task */
 		for_each_cpu(cpu, lowest_mask) {
-			if (!fitness_fn(p, cpu))
+			if (!fitness_fn(sched_ctx, cpu))
 				cpumask_clear_cpu(cpu, lowest_mask);
 		}

@@ -192,7 +199,7 @@ int cpupri_find_fitness(struct cpupri *cp, struct task_struct *p,
 	 * really care.
 	 */
 	if (fitness_fn)
-		return cpupri_find(cp, p, lowest_mask);
+		return cpupri_find(cp, sched_ctx, exec_ctx, lowest_mask);

 	return 0;
 }
@@ -18,9 +18,11 @@ struct cpupri {
 };

 #ifdef CONFIG_SMP
-int  cpupri_find(struct cpupri *cp, struct task_struct *p,
+int  cpupri_find(struct cpupri *cp, struct task_struct *sched_ctx,
+		 struct task_struct *exec_ctx,
 		 struct cpumask *lowest_mask);
-int  cpupri_find_fitness(struct cpupri *cp, struct task_struct *p,
+int  cpupri_find_fitness(struct cpupri *cp, struct task_struct *sched_ctx,
+			 struct task_struct *exec_ctx,
 			 struct cpumask *lowest_mask,
 			 bool (*fitness_fn)(struct task_struct *p, int cpu));
 void cpupri_set(struct cpupri *cp, int cpu, int pri);
@@ -2211,7 +2211,7 @@ static inline bool dl_task_is_earliest_deadline(struct task_struct *p,
 			       rq->dl.earliest_dl.curr));
 }

-static int find_later_rq(struct task_struct *task);
+static int find_later_rq(struct task_struct *sched_ctx, struct task_struct *exec_ctx);

 static int
 select_task_rq_dl(struct task_struct *p, int cpu, int flags)
@@ -2251,7 +2251,7 @@ select_task_rq_dl(struct task_struct *p, int cpu, int flags)
 		select_rq |= !dl_task_fits_capacity(p, cpu);

 	if (select_rq) {
-		int target = find_later_rq(p);
+		int target = find_later_rq(p, p);

 		if (target != -1 &&
 		    dl_task_is_earliest_deadline(p, cpu_rq(target)))
@@ -2303,7 +2303,7 @@ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
 	 * let's hope p can move out.
 	 */
 	if (rq->curr->nr_cpus_allowed == 1 ||
-	    !cpudl_find(&rq->rd->cpudl, rq->donor, NULL))
+	    !cpudl_find(&rq->rd->cpudl, rq->donor, rq->curr, NULL))
 		return;

 	/*
@@ -2311,7 +2311,7 @@ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
 	 * see if it is pushed or pulled somewhere else.
 	 */
 	if (p->nr_cpus_allowed != 1 &&
-	    cpudl_find(&rq->rd->cpudl, p, NULL))
+	    cpudl_find(&rq->rd->cpudl, p, p, NULL))
 		return;

 	resched_curr(rq);
@@ -2521,25 +2521,25 @@ next_node:

 static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl);

-static int find_later_rq(struct task_struct *task)
+static int find_later_rq(struct task_struct *sched_ctx, struct task_struct *exec_ctx)
 {
 	struct sched_domain *sd;
 	struct cpumask *later_mask = this_cpu_cpumask_var_ptr(local_cpu_mask_dl);
 	int this_cpu = smp_processor_id();
-	int cpu = task_cpu(task);
+	int cpu = task_cpu(sched_ctx);

 	/* Make sure the mask is initialized first */
 	if (unlikely(!later_mask))
 		return -1;

-	if (task->nr_cpus_allowed == 1)
+	if (exec_ctx->nr_cpus_allowed == 1)
 		return -1;

 	/*
 	 * We have to consider system topology and task affinity
 	 * first, then we can look for a suitable CPU.
 	 */
-	if (!cpudl_find(&task_rq(task)->rd->cpudl, task, later_mask))
+	if (!cpudl_find(&task_rq(exec_ctx)->rd->cpudl, sched_ctx, exec_ctx, later_mask))
 		return -1;

 	/*
@@ -2616,7 +2616,7 @@ static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq)
 	int cpu;

 	for (tries = 0; tries < DL_MAX_TRIES; tries++) {
-		cpu = find_later_rq(task);
+		cpu = find_later_rq(task, task);

 		if ((cpu == -1) || (cpu == rq->cpu))
 			break;
@@ -1579,7 +1579,7 @@ static void yield_task_rt(struct rq *rq)
 }

 #ifdef CONFIG_SMP
-static int find_lowest_rq(struct task_struct *task);
+static int find_lowest_rq(struct task_struct *sched_ctx, struct task_struct *exec_ctx);

 #ifdef CONFIG_RT_SOFTIRQ_AWARE_SCHED
 /*
@@ -1673,7 +1673,7 @@ select_task_rq_rt(struct task_struct *p, int cpu, int flags)
 	}

 	if (test || !rt_task_fits_cpu(p, cpu)) {
-		int target = find_lowest_rq(p);
+		int target = find_lowest_rq(p, p);

 		/*
 		 * Bail out if we were forcing a migration to find a better
@@ -1700,8 +1700,13 @@ out:

 static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
 {
+	struct task_struct *exec_ctx = p;
+	/*
+	 * Current can't be migrated, useless to reschedule,
+	 * let's hope p can move out.
+	 */
 	if (rq->curr->nr_cpus_allowed == 1 ||
-	    !cpupri_find(&rq->rd->cpupri, rq->donor, NULL))
+	    !cpupri_find(&rq->rd->cpupri, rq->donor, rq->curr, NULL))
 		return;

 	/*
@@ -1709,7 +1714,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
 	 * see if it is pushed or pulled somewhere else.
 	 */
 	if (p->nr_cpus_allowed != 1 &&
-	    cpupri_find(&rq->rd->cpupri, p, NULL))
+	    cpupri_find(&rq->rd->cpupri, p, exec_ctx, NULL))
 		return;

 	/*
@@ -1894,7 +1899,7 @@ EXPORT_SYMBOL_GPL(pick_highest_pushable_task);

 static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);

-static int find_lowest_rq(struct task_struct *task)
+static int find_lowest_rq(struct task_struct *sched_ctx, struct task_struct *exec_ctx)
 {
 	struct sched_domain *sd;
 	struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask);
@@ -1906,7 +1911,7 @@ static int find_lowest_rq(struct task_struct *task)
 	if (unlikely(!lowest_mask))
 		return -1;

-	if (task->nr_cpus_allowed == 1)
+	if (exec_ctx->nr_cpus_allowed == 1)
 		return -1; /* No other targets possible */

 	/*
@@ -1918,23 +1923,23 @@ static int find_lowest_rq(struct task_struct *task)
 	if (IS_ENABLED(CONFIG_RT_SOFTIRQ_AWARE_SCHED) ||
 	    sched_asym_cpucap_active()) {

-		ret = cpupri_find_fitness(&task_rq(task)->rd->cpupri,
-					  task, lowest_mask,
+		ret = cpupri_find_fitness(&task_rq(sched_ctx)->rd->cpupri,
+					  sched_ctx, exec_ctx, lowest_mask,
 					  rt_task_fits_cpu);
 	} else {

-		ret = cpupri_find(&task_rq(task)->rd->cpupri,
-				  task, lowest_mask);
+		ret = cpupri_find(&task_rq(sched_ctx)->rd->cpupri,
+				  sched_ctx, exec_ctx, lowest_mask);
 	}

-	trace_android_rvh_find_lowest_rq(task, lowest_mask, ret, &cpu);
+	trace_android_rvh_find_lowest_rq(sched_ctx, lowest_mask, ret, &cpu);
 	if (cpu >= 0)
 		return cpu;

 	if (!ret)
 		return -1; /* No targets found */

-	cpu = task_cpu(task);
+	cpu = task_cpu(sched_ctx);

 	/*
 	 * At this point we have built a mask of CPUs representing the
@@ -2002,7 +2007,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
 	int cpu;

 	for (tries = 0; tries < RT_MAX_TRIES; tries++) {
-		cpu = find_lowest_rq(task);
+		cpu = find_lowest_rq(task, task);

 		if ((cpu == -1) || (cpu == rq->cpu))
 			break;
@@ -2124,7 +2129,7 @@ retry:
 		if (rq->donor->sched_class != &rt_sched_class)
 			return 0;

-		cpu = find_lowest_rq(rq->curr);
+		cpu = find_lowest_rq(rq->donor, rq->curr);
 		if (cpu == -1 || cpu == rq->cpu)
 			return 0;