diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 6cbb6cb136e9..27c214317942 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -593,6 +593,11 @@ enum #define SOFTIRQ_HOTPLUG_SAFE_MASK (BIT(TIMER_SOFTIRQ) | BIT(IRQ_POLL_SOFTIRQ) |\ BIT(HRTIMER_SOFTIRQ) | BIT(RCU_SOFTIRQ)) +/* Softirq's where the handling might be long: */ +#define LONG_SOFTIRQ_MASK (BIT(NET_TX_SOFTIRQ) | \ + BIT(NET_RX_SOFTIRQ) | \ + BIT(BLOCK_SOFTIRQ) | \ + BIT(IRQ_POLL_SOFTIRQ)) /* map softirq index to softirq name. update 'softirq_to_name' in * kernel/softirq.c when adding a new softirq. @@ -629,6 +634,10 @@ extern void raise_softirq(unsigned int nr); DECLARE_PER_CPU(struct task_struct *, ksoftirqd); +#ifdef CONFIG_RT_SOFTIRQ_AWARE_SCHED +DECLARE_PER_CPU(u32, active_softirqs); +#endif + static inline struct task_struct *this_cpu_ksoftirqd(void) { return this_cpu_read(ksoftirqd); diff --git a/init/Kconfig b/init/Kconfig index bdbcfda1d22e..ee5334acb06a 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1344,6 +1344,16 @@ config SCHED_AUTOGROUP desktop applications. Task group autogeneration is currently based upon task session. +config RT_SOFTIRQ_AWARE_SCHED + bool "Improve RT scheduling during long softirq execution" + depends on SMP && !PREEMPT_RT + default n + help + Enable an optimization which tries to avoid placing RT tasks on CPUs + occupied by nonpreemptible tasks, such as a long softirq or CPUs + which may soon block preemptions, such as a CPU running a ksoftirq + thread which handles slow softirqs. + config RELAY bool "Kernel->user space relay support (formerly relayfs)" select IRQ_WORK diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 77ae531b37ff..d04dbc34cb22 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1541,6 +1541,32 @@ static void yield_task_rt(struct rq *rq) #ifdef CONFIG_SMP static int find_lowest_rq(struct task_struct *task); +#ifdef CONFIG_RT_SOFTIRQ_AWARE_SCHED +/* + * Return whether the given cpu is currently non-preemptible + * while handling a potentially long softirq, or if the current + * task is likely to block preemptions soon because it is a + * ksoftirq thread that is handling softirqs. + */ +static bool cpu_busy_with_softirqs(int cpu) +{ + u32 softirqs = per_cpu(active_softirqs, cpu) | + __cpu_softirq_pending(cpu); + + return softirqs & LONG_SOFTIRQ_MASK; +} +#else +static bool cpu_busy_with_softirqs(int cpu) +{ + return false; +} +#endif /* CONFIG_RT_SOFTIRQ_AWARE_SCHED */ + +static bool rt_task_fits_cpu(struct task_struct *p, int cpu) +{ + return rt_task_fits_capacity(p, cpu) && !cpu_busy_with_softirqs(cpu); +} + static int select_task_rq_rt(struct task_struct *p, int cpu, int flags) { @@ -1585,22 +1611,24 @@ select_task_rq_rt(struct task_struct *p, int cpu, int flags) * This test is optimistic, if we get it wrong the load-balancer * will have to sort it out. * - * We take into account the capacity of the CPU to ensure it fits the - * requirement of the task - which is only important on heterogeneous - * systems like big.LITTLE. + * We use rt_task_fits_cpu() to evaluate if the CPU is busy with + * potentially long-running softirq work, as well as take into + * account the capacity of the CPU to ensure it fits the + * requirement of the task - which is only important on + * heterogeneous systems like big.LITTLE. */ test = curr && unlikely(rt_task(curr)) && (curr->nr_cpus_allowed < 2 || curr->prio <= p->prio); - if (test || !rt_task_fits_capacity(p, cpu)) { + if (test || !rt_task_fits_cpu(p, cpu)) { int target = find_lowest_rq(p); /* * Bail out if we were forcing a migration to find a better * fitting CPU but our search failed. */ - if (!test && target != -1 && !rt_task_fits_capacity(p, target)) + if (!test && target != -1 && !rt_task_fits_cpu(p, target)) goto out_unlock; /* @@ -1840,14 +1868,17 @@ static int find_lowest_rq(struct task_struct *task) return -1; /* No other targets possible */ /* - * If we're on asym system ensure we consider the different capacities - * of the CPUs when searching for the lowest_mask. + * If we're using the softirq optimization or if we are + * on asym system, ensure we consider the softirq processing + * or different capacities of the CPUs when searching for the + * lowest_mask. */ - if (sched_asym_cpucap_active()) { + if (IS_ENABLED(CONFIG_RT_SOFTIRQ_AWARE_SCHED) || + sched_asym_cpucap_active()) { ret = cpupri_find_fitness(&task_rq(task)->rd->cpupri, task, lowest_mask, - rt_task_fits_capacity); + rt_task_fits_cpu); } else { ret = cpupri_find(&task_rq(task)->rd->cpupri, diff --git a/kernel/softirq.c b/kernel/softirq.c index 8371b7c4b517..b69e316c1688 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -64,6 +64,21 @@ static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp DEFINE_PER_CPU(struct task_struct *, ksoftirqd); EXPORT_PER_CPU_SYMBOL_GPL(ksoftirqd); +#ifdef CONFIG_RT_SOFTIRQ_AWARE_SCHED +/* + * active_softirqs -- per cpu, a mask of softirqs that are being handled, + * with the expectation that approximate answers are acceptable and therefore + * no synchronization. + */ +DEFINE_PER_CPU(u32, active_softirqs); +static inline void set_active_softirqs(u32 pending) +{ + __this_cpu_write(active_softirqs, pending); +} +#else /* CONFIG_RT_SOFTIRQ_AWARE_SCHED */ +static inline void set_active_softirqs(u32 pending) {}; +#endif /* CONFIG_RT_SOFTIRQ_AWARE_SCHED */ + const char * const softirq_to_name[NR_SOFTIRQS] = { "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "IRQ_POLL", "TASKLET", "SCHED", "HRTIMER", "RCU" @@ -537,6 +552,7 @@ static void handle_softirqs(bool ksirqd) restart: /* Reset the pending bitmask before enabling irqs */ set_softirq_pending(0); + set_active_softirqs(pending); local_irq_enable(); @@ -566,6 +582,7 @@ restart: pending >>= softirq_bit; } + set_active_softirqs(0); if (!IS_ENABLED(CONFIG_PREEMPT_RT) && ksirqd) rcu_softirq_qs();