Merge branches 'sched/devel', 'sched/cpu-hotplug', 'sched/cpusets' and 'sched/urgent' into sched/core
This commit is contained in:
+22
-2
@@ -199,13 +199,14 @@ static int __ref take_cpu_down(void *_param)
|
||||
struct take_cpu_down_param *param = _param;
|
||||
int err;
|
||||
|
||||
raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
|
||||
param->hcpu);
|
||||
/* Ensure this CPU doesn't handle any more interrupts. */
|
||||
err = __cpu_disable();
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
|
||||
param->hcpu);
|
||||
|
||||
/* Force idle task to run as soon as we yield: it should
|
||||
immediately notice cpu is offline and die quickly. */
|
||||
sched_idle_next();
|
||||
@@ -453,6 +454,25 @@ out:
|
||||
}
|
||||
#endif /* CONFIG_PM_SLEEP_SMP */
|
||||
|
||||
/**
|
||||
* notify_cpu_starting(cpu) - call the CPU_STARTING notifiers
|
||||
* @cpu: cpu that just started
|
||||
*
|
||||
* This function calls the cpu_chain notifiers with CPU_STARTING.
|
||||
* It must be called by the arch code on the new cpu, before the new cpu
|
||||
* enables interrupts and before the "boot" cpu returns from __cpu_up().
|
||||
*/
|
||||
void notify_cpu_starting(unsigned int cpu)
|
||||
{
|
||||
unsigned long val = CPU_STARTING;
|
||||
|
||||
#ifdef CONFIG_PM_SLEEP_SMP
|
||||
if (cpu_isset(cpu, frozen_cpus))
|
||||
val = CPU_STARTING_FROZEN;
|
||||
#endif /* CONFIG_PM_SLEEP_SMP */
|
||||
raw_notifier_call_chain(&cpu_chain, val, (void *)(long)cpu);
|
||||
}
|
||||
|
||||
#endif /* CONFIG_SMP */
|
||||
|
||||
/*
|
||||
|
||||
+1
-1
@@ -1921,7 +1921,7 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs)
|
||||
* that has tasks along with an empty 'mems'. But if we did see such
|
||||
* a cpuset, we'd handle it just like we do if its 'cpus' was empty.
|
||||
*/
|
||||
static void scan_for_empty_cpusets(const struct cpuset *root)
|
||||
static void scan_for_empty_cpusets(struct cpuset *root)
|
||||
{
|
||||
LIST_HEAD(queue);
|
||||
struct cpuset *cp; /* scans cpusets being updated */
|
||||
|
||||
+243
-150
@@ -204,11 +204,16 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
|
||||
rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_UNLOCKED;
|
||||
}
|
||||
|
||||
static inline int rt_bandwidth_enabled(void)
|
||||
{
|
||||
return sysctl_sched_rt_runtime >= 0;
|
||||
}
|
||||
|
||||
static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
|
||||
{
|
||||
ktime_t now;
|
||||
|
||||
if (rt_b->rt_runtime == RUNTIME_INF)
|
||||
if (rt_bandwidth_enabled() && rt_b->rt_runtime == RUNTIME_INF)
|
||||
return;
|
||||
|
||||
if (hrtimer_active(&rt_b->rt_period_timer))
|
||||
@@ -298,9 +303,9 @@ static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp;
|
||||
static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity);
|
||||
static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp;
|
||||
#endif /* CONFIG_RT_GROUP_SCHED */
|
||||
#else /* !CONFIG_FAIR_GROUP_SCHED */
|
||||
#else /* !CONFIG_USER_SCHED */
|
||||
#define root_task_group init_task_group
|
||||
#endif /* CONFIG_FAIR_GROUP_SCHED */
|
||||
#endif /* CONFIG_USER_SCHED */
|
||||
|
||||
/* task_group_lock serializes add/remove of task groups and also changes to
|
||||
* a task group's cpu shares.
|
||||
@@ -604,9 +609,9 @@ struct rq {
|
||||
|
||||
static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
|
||||
|
||||
static inline void check_preempt_curr(struct rq *rq, struct task_struct *p)
|
||||
static inline void check_preempt_curr(struct rq *rq, struct task_struct *p, int sync)
|
||||
{
|
||||
rq->curr->sched_class->check_preempt_curr(rq, p);
|
||||
rq->curr->sched_class->check_preempt_curr(rq, p, sync);
|
||||
}
|
||||
|
||||
static inline int cpu_of(struct rq *rq)
|
||||
@@ -1102,7 +1107,7 @@ static void hrtick_start(struct rq *rq, u64 delay)
|
||||
hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay), HRTIMER_MODE_REL);
|
||||
}
|
||||
|
||||
static void init_hrtick(void)
|
||||
static inline void init_hrtick(void)
|
||||
{
|
||||
}
|
||||
#endif /* CONFIG_SMP */
|
||||
@@ -1121,7 +1126,7 @@ static void init_rq_hrtick(struct rq *rq)
|
||||
rq->hrtick_timer.function = hrtick;
|
||||
rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_PERCPU;
|
||||
}
|
||||
#else
|
||||
#else /* CONFIG_SCHED_HRTICK */
|
||||
static inline void hrtick_clear(struct rq *rq)
|
||||
{
|
||||
}
|
||||
@@ -1133,7 +1138,7 @@ static inline void init_rq_hrtick(struct rq *rq)
|
||||
static inline void init_hrtick(void)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
#endif /* CONFIG_SCHED_HRTICK */
|
||||
|
||||
/*
|
||||
* resched_task - mark a task 'to be rescheduled now'.
|
||||
@@ -1380,6 +1385,51 @@ static inline void dec_cpu_load(struct rq *rq, unsigned long load)
|
||||
update_load_sub(&rq->load, load);
|
||||
}
|
||||
|
||||
#if (defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED)) || defined(CONFIG_RT_GROUP_SCHED)
|
||||
typedef int (*tg_visitor)(struct task_group *, void *);
|
||||
|
||||
/*
|
||||
* Iterate the full tree, calling @down when first entering a node and @up when
|
||||
* leaving it for the final time.
|
||||
*/
|
||||
static int walk_tg_tree(tg_visitor down, tg_visitor up, void *data)
|
||||
{
|
||||
struct task_group *parent, *child;
|
||||
int ret;
|
||||
|
||||
rcu_read_lock();
|
||||
parent = &root_task_group;
|
||||
down:
|
||||
ret = (*down)(parent, data);
|
||||
if (ret)
|
||||
goto out_unlock;
|
||||
list_for_each_entry_rcu(child, &parent->children, siblings) {
|
||||
parent = child;
|
||||
goto down;
|
||||
|
||||
up:
|
||||
continue;
|
||||
}
|
||||
ret = (*up)(parent, data);
|
||||
if (ret)
|
||||
goto out_unlock;
|
||||
|
||||
child = parent;
|
||||
parent = parent->parent;
|
||||
if (parent)
|
||||
goto up;
|
||||
out_unlock:
|
||||
rcu_read_unlock();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int tg_nop(struct task_group *tg, void *data)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
static unsigned long source_load(int cpu, int type);
|
||||
static unsigned long target_load(int cpu, int type);
|
||||
@@ -1397,37 +1447,6 @@ static unsigned long cpu_avg_load_per_task(int cpu)
|
||||
|
||||
#ifdef CONFIG_FAIR_GROUP_SCHED
|
||||
|
||||
typedef void (*tg_visitor)(struct task_group *, int, struct sched_domain *);
|
||||
|
||||
/*
|
||||
* Iterate the full tree, calling @down when first entering a node and @up when
|
||||
* leaving it for the final time.
|
||||
*/
|
||||
static void
|
||||
walk_tg_tree(tg_visitor down, tg_visitor up, int cpu, struct sched_domain *sd)
|
||||
{
|
||||
struct task_group *parent, *child;
|
||||
|
||||
rcu_read_lock();
|
||||
parent = &root_task_group;
|
||||
down:
|
||||
(*down)(parent, cpu, sd);
|
||||
list_for_each_entry_rcu(child, &parent->children, siblings) {
|
||||
parent = child;
|
||||
goto down;
|
||||
|
||||
up:
|
||||
continue;
|
||||
}
|
||||
(*up)(parent, cpu, sd);
|
||||
|
||||
child = parent;
|
||||
parent = parent->parent;
|
||||
if (parent)
|
||||
goto up;
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
static void __set_se_shares(struct sched_entity *se, unsigned long shares);
|
||||
|
||||
/*
|
||||
@@ -1486,11 +1505,11 @@ __update_group_shares_cpu(struct task_group *tg, int cpu,
|
||||
* This needs to be done in a bottom-up fashion because the rq weight of a
|
||||
* parent group depends on the shares of its child groups.
|
||||
*/
|
||||
static void
|
||||
tg_shares_up(struct task_group *tg, int cpu, struct sched_domain *sd)
|
||||
static int tg_shares_up(struct task_group *tg, void *data)
|
||||
{
|
||||
unsigned long rq_weight = 0;
|
||||
unsigned long shares = 0;
|
||||
struct sched_domain *sd = data;
|
||||
int i;
|
||||
|
||||
for_each_cpu_mask(i, sd->span) {
|
||||
@@ -1515,6 +1534,8 @@ tg_shares_up(struct task_group *tg, int cpu, struct sched_domain *sd)
|
||||
__update_group_shares_cpu(tg, i, shares, rq_weight);
|
||||
spin_unlock_irqrestore(&rq->lock, flags);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1522,10 +1543,10 @@ tg_shares_up(struct task_group *tg, int cpu, struct sched_domain *sd)
|
||||
* This needs to be done in a top-down fashion because the load of a child
|
||||
* group is a fraction of its parents load.
|
||||
*/
|
||||
static void
|
||||
tg_load_down(struct task_group *tg, int cpu, struct sched_domain *sd)
|
||||
static int tg_load_down(struct task_group *tg, void *data)
|
||||
{
|
||||
unsigned long load;
|
||||
long cpu = (long)data;
|
||||
|
||||
if (!tg->parent) {
|
||||
load = cpu_rq(cpu)->load.weight;
|
||||
@@ -1536,11 +1557,8 @@ tg_load_down(struct task_group *tg, int cpu, struct sched_domain *sd)
|
||||
}
|
||||
|
||||
tg->cfs_rq[cpu]->h_load = load;
|
||||
}
|
||||
|
||||
static void
|
||||
tg_nop(struct task_group *tg, int cpu, struct sched_domain *sd)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void update_shares(struct sched_domain *sd)
|
||||
@@ -1550,7 +1568,7 @@ static void update_shares(struct sched_domain *sd)
|
||||
|
||||
if (elapsed >= (s64)(u64)sysctl_sched_shares_ratelimit) {
|
||||
sd->last_update = now;
|
||||
walk_tg_tree(tg_nop, tg_shares_up, 0, sd);
|
||||
walk_tg_tree(tg_nop, tg_shares_up, sd);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1561,9 +1579,9 @@ static void update_shares_locked(struct rq *rq, struct sched_domain *sd)
|
||||
spin_lock(&rq->lock);
|
||||
}
|
||||
|
||||
static void update_h_load(int cpu)
|
||||
static void update_h_load(long cpu)
|
||||
{
|
||||
walk_tg_tree(tg_load_down, tg_nop, cpu, NULL);
|
||||
walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
|
||||
}
|
||||
|
||||
#else
|
||||
@@ -1921,11 +1939,8 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
|
||||
running = task_running(rq, p);
|
||||
on_rq = p->se.on_rq;
|
||||
ncsw = 0;
|
||||
if (!match_state || p->state == match_state) {
|
||||
ncsw = p->nivcsw + p->nvcsw;
|
||||
if (unlikely(!ncsw))
|
||||
ncsw = 1;
|
||||
}
|
||||
if (!match_state || p->state == match_state)
|
||||
ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
|
||||
task_rq_unlock(rq, &flags);
|
||||
|
||||
/*
|
||||
@@ -2285,7 +2300,7 @@ out_running:
|
||||
trace_mark(kernel_sched_wakeup,
|
||||
"pid %d state %ld ## rq %p task %p rq->curr %p",
|
||||
p->pid, p->state, rq, p, rq->curr);
|
||||
check_preempt_curr(rq, p);
|
||||
check_preempt_curr(rq, p, sync);
|
||||
|
||||
p->state = TASK_RUNNING;
|
||||
#ifdef CONFIG_SMP
|
||||
@@ -2420,7 +2435,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
|
||||
trace_mark(kernel_sched_wakeup_new,
|
||||
"pid %d state %ld ## rq %p task %p rq->curr %p",
|
||||
p->pid, p->state, rq, p, rq->curr);
|
||||
check_preempt_curr(rq, p);
|
||||
check_preempt_curr(rq, p, 0);
|
||||
#ifdef CONFIG_SMP
|
||||
if (p->sched_class->task_wake_up)
|
||||
p->sched_class->task_wake_up(rq, p);
|
||||
@@ -2880,7 +2895,7 @@ static void pull_task(struct rq *src_rq, struct task_struct *p,
|
||||
* Note that idle threads have a prio of MAX_PRIO, for this test
|
||||
* to be always true for them.
|
||||
*/
|
||||
check_preempt_curr(this_rq, p);
|
||||
check_preempt_curr(this_rq, p, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -4627,6 +4642,15 @@ __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */
|
||||
|
||||
/**
|
||||
* complete: - signals a single thread waiting on this completion
|
||||
* @x: holds the state of this particular completion
|
||||
*
|
||||
* This will wake up a single thread waiting on this completion. Threads will be
|
||||
* awakened in the same order in which they were queued.
|
||||
*
|
||||
* See also complete_all(), wait_for_completion() and related routines.
|
||||
*/
|
||||
void complete(struct completion *x)
|
||||
{
|
||||
unsigned long flags;
|
||||
@@ -4638,6 +4662,12 @@ void complete(struct completion *x)
|
||||
}
|
||||
EXPORT_SYMBOL(complete);
|
||||
|
||||
/**
|
||||
* complete_all: - signals all threads waiting on this completion
|
||||
* @x: holds the state of this particular completion
|
||||
*
|
||||
* This will wake up all threads waiting on this particular completion event.
|
||||
*/
|
||||
void complete_all(struct completion *x)
|
||||
{
|
||||
unsigned long flags;
|
||||
@@ -4658,10 +4688,7 @@ do_wait_for_common(struct completion *x, long timeout, int state)
|
||||
wait.flags |= WQ_FLAG_EXCLUSIVE;
|
||||
__add_wait_queue_tail(&x->wait, &wait);
|
||||
do {
|
||||
if ((state == TASK_INTERRUPTIBLE &&
|
||||
signal_pending(current)) ||
|
||||
(state == TASK_KILLABLE &&
|
||||
fatal_signal_pending(current))) {
|
||||
if (signal_pending_state(state, current)) {
|
||||
timeout = -ERESTARTSYS;
|
||||
break;
|
||||
}
|
||||
@@ -4689,12 +4716,31 @@ wait_for_common(struct completion *x, long timeout, int state)
|
||||
return timeout;
|
||||
}
|
||||
|
||||
/**
|
||||
* wait_for_completion: - waits for completion of a task
|
||||
* @x: holds the state of this particular completion
|
||||
*
|
||||
* This waits to be signaled for completion of a specific task. It is NOT
|
||||
* interruptible and there is no timeout.
|
||||
*
|
||||
* See also similar routines (i.e. wait_for_completion_timeout()) with timeout
|
||||
* and interrupt capability. Also see complete().
|
||||
*/
|
||||
void __sched wait_for_completion(struct completion *x)
|
||||
{
|
||||
wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE);
|
||||
}
|
||||
EXPORT_SYMBOL(wait_for_completion);
|
||||
|
||||
/**
|
||||
* wait_for_completion_timeout: - waits for completion of a task (w/timeout)
|
||||
* @x: holds the state of this particular completion
|
||||
* @timeout: timeout value in jiffies
|
||||
*
|
||||
* This waits for either a completion of a specific task to be signaled or for a
|
||||
* specified timeout to expire. The timeout is in jiffies. It is not
|
||||
* interruptible.
|
||||
*/
|
||||
unsigned long __sched
|
||||
wait_for_completion_timeout(struct completion *x, unsigned long timeout)
|
||||
{
|
||||
@@ -4702,6 +4748,13 @@ wait_for_completion_timeout(struct completion *x, unsigned long timeout)
|
||||
}
|
||||
EXPORT_SYMBOL(wait_for_completion_timeout);
|
||||
|
||||
/**
|
||||
* wait_for_completion_interruptible: - waits for completion of a task (w/intr)
|
||||
* @x: holds the state of this particular completion
|
||||
*
|
||||
* This waits for completion of a specific task to be signaled. It is
|
||||
* interruptible.
|
||||
*/
|
||||
int __sched wait_for_completion_interruptible(struct completion *x)
|
||||
{
|
||||
long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_INTERRUPTIBLE);
|
||||
@@ -4711,6 +4764,14 @@ int __sched wait_for_completion_interruptible(struct completion *x)
|
||||
}
|
||||
EXPORT_SYMBOL(wait_for_completion_interruptible);
|
||||
|
||||
/**
|
||||
* wait_for_completion_interruptible_timeout: - waits for completion (w/(to,intr))
|
||||
* @x: holds the state of this particular completion
|
||||
* @timeout: timeout value in jiffies
|
||||
*
|
||||
* This waits for either a completion of a specific task to be signaled or for a
|
||||
* specified timeout to expire. It is interruptible. The timeout is in jiffies.
|
||||
*/
|
||||
unsigned long __sched
|
||||
wait_for_completion_interruptible_timeout(struct completion *x,
|
||||
unsigned long timeout)
|
||||
@@ -4719,6 +4780,13 @@ wait_for_completion_interruptible_timeout(struct completion *x,
|
||||
}
|
||||
EXPORT_SYMBOL(wait_for_completion_interruptible_timeout);
|
||||
|
||||
/**
|
||||
* wait_for_completion_killable: - waits for completion of a task (killable)
|
||||
* @x: holds the state of this particular completion
|
||||
*
|
||||
* This waits to be signaled for completion of a specific task. It can be
|
||||
* interrupted by a kill signal.
|
||||
*/
|
||||
int __sched wait_for_completion_killable(struct completion *x)
|
||||
{
|
||||
long t = wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_KILLABLE);
|
||||
@@ -5121,7 +5189,8 @@ recheck:
|
||||
* Do not allow realtime tasks into groups that have no runtime
|
||||
* assigned.
|
||||
*/
|
||||
if (rt_policy(policy) && task_group(p)->rt_bandwidth.rt_runtime == 0)
|
||||
if (rt_bandwidth_enabled() && rt_policy(policy) &&
|
||||
task_group(p)->rt_bandwidth.rt_runtime == 0)
|
||||
return -EPERM;
|
||||
#endif
|
||||
|
||||
@@ -5957,7 +6026,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
|
||||
set_task_cpu(p, dest_cpu);
|
||||
if (on_rq) {
|
||||
activate_task(rq_dest, p, 0);
|
||||
check_preempt_curr(rq_dest, p);
|
||||
check_preempt_curr(rq_dest, p, 0);
|
||||
}
|
||||
done:
|
||||
ret = 1;
|
||||
@@ -8242,20 +8311,25 @@ void __might_sleep(char *file, int line)
|
||||
#ifdef in_atomic
|
||||
static unsigned long prev_jiffy; /* ratelimiting */
|
||||
|
||||
if ((in_atomic() || irqs_disabled()) &&
|
||||
system_state == SYSTEM_RUNNING && !oops_in_progress) {
|
||||
if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
|
||||
return;
|
||||
prev_jiffy = jiffies;
|
||||
printk(KERN_ERR "BUG: sleeping function called from invalid"
|
||||
" context at %s:%d\n", file, line);
|
||||
printk("in_atomic():%d, irqs_disabled():%d\n",
|
||||
in_atomic(), irqs_disabled());
|
||||
debug_show_held_locks(current);
|
||||
if (irqs_disabled())
|
||||
print_irqtrace_events(current);
|
||||
dump_stack();
|
||||
}
|
||||
if ((!in_atomic() && !irqs_disabled()) ||
|
||||
system_state != SYSTEM_RUNNING || oops_in_progress)
|
||||
return;
|
||||
if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
|
||||
return;
|
||||
prev_jiffy = jiffies;
|
||||
|
||||
printk(KERN_ERR
|
||||
"BUG: sleeping function called from invalid context at %s:%d\n",
|
||||
file, line);
|
||||
printk(KERN_ERR
|
||||
"in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n",
|
||||
in_atomic(), irqs_disabled(),
|
||||
current->pid, current->comm);
|
||||
|
||||
debug_show_held_locks(current);
|
||||
if (irqs_disabled())
|
||||
print_irqtrace_events(current);
|
||||
dump_stack();
|
||||
#endif
|
||||
}
|
||||
EXPORT_SYMBOL(__might_sleep);
|
||||
@@ -8753,75 +8827,97 @@ static DEFINE_MUTEX(rt_constraints_mutex);
|
||||
static unsigned long to_ratio(u64 period, u64 runtime)
|
||||
{
|
||||
if (runtime == RUNTIME_INF)
|
||||
return 1ULL << 16;
|
||||
return 1ULL << 20;
|
||||
|
||||
return div64_u64(runtime << 16, period);
|
||||
return div64_u64(runtime << 20, period);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_CGROUP_SCHED
|
||||
static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
|
||||
{
|
||||
struct task_group *tgi, *parent = tg->parent;
|
||||
unsigned long total = 0;
|
||||
|
||||
if (!parent) {
|
||||
if (global_rt_period() < period)
|
||||
return 0;
|
||||
|
||||
return to_ratio(period, runtime) <
|
||||
to_ratio(global_rt_period(), global_rt_runtime());
|
||||
}
|
||||
|
||||
if (ktime_to_ns(parent->rt_bandwidth.rt_period) < period)
|
||||
return 0;
|
||||
|
||||
rcu_read_lock();
|
||||
list_for_each_entry_rcu(tgi, &parent->children, siblings) {
|
||||
if (tgi == tg)
|
||||
continue;
|
||||
|
||||
total += to_ratio(ktime_to_ns(tgi->rt_bandwidth.rt_period),
|
||||
tgi->rt_bandwidth.rt_runtime);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
return total + to_ratio(period, runtime) <=
|
||||
to_ratio(ktime_to_ns(parent->rt_bandwidth.rt_period),
|
||||
parent->rt_bandwidth.rt_runtime);
|
||||
}
|
||||
#elif defined CONFIG_USER_SCHED
|
||||
static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
|
||||
{
|
||||
struct task_group *tgi;
|
||||
unsigned long total = 0;
|
||||
unsigned long global_ratio =
|
||||
to_ratio(global_rt_period(), global_rt_runtime());
|
||||
|
||||
rcu_read_lock();
|
||||
list_for_each_entry_rcu(tgi, &task_groups, list) {
|
||||
if (tgi == tg)
|
||||
continue;
|
||||
|
||||
total += to_ratio(ktime_to_ns(tgi->rt_bandwidth.rt_period),
|
||||
tgi->rt_bandwidth.rt_runtime);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
return total + to_ratio(period, runtime) < global_ratio;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Must be called with tasklist_lock held */
|
||||
static inline int tg_has_rt_tasks(struct task_group *tg)
|
||||
{
|
||||
struct task_struct *g, *p;
|
||||
|
||||
do_each_thread(g, p) {
|
||||
if (rt_task(p) && rt_rq_of_se(&p->rt)->tg == tg)
|
||||
return 1;
|
||||
} while_each_thread(g, p);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct rt_schedulable_data {
|
||||
struct task_group *tg;
|
||||
u64 rt_period;
|
||||
u64 rt_runtime;
|
||||
};
|
||||
|
||||
static int tg_schedulable(struct task_group *tg, void *data)
|
||||
{
|
||||
struct rt_schedulable_data *d = data;
|
||||
struct task_group *child;
|
||||
unsigned long total, sum = 0;
|
||||
u64 period, runtime;
|
||||
|
||||
period = ktime_to_ns(tg->rt_bandwidth.rt_period);
|
||||
runtime = tg->rt_bandwidth.rt_runtime;
|
||||
|
||||
if (tg == d->tg) {
|
||||
period = d->rt_period;
|
||||
runtime = d->rt_runtime;
|
||||
}
|
||||
|
||||
/*
|
||||
* Cannot have more runtime than the period.
|
||||
*/
|
||||
if (runtime > period && runtime != RUNTIME_INF)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* Ensure we don't starve existing RT tasks.
|
||||
*/
|
||||
if (rt_bandwidth_enabled() && !runtime && tg_has_rt_tasks(tg))
|
||||
return -EBUSY;
|
||||
|
||||
total = to_ratio(period, runtime);
|
||||
|
||||
/*
|
||||
* Nobody can have more than the global setting allows.
|
||||
*/
|
||||
if (total > to_ratio(global_rt_period(), global_rt_runtime()))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* The sum of our children's runtime should not exceed our own.
|
||||
*/
|
||||
list_for_each_entry_rcu(child, &tg->children, siblings) {
|
||||
period = ktime_to_ns(child->rt_bandwidth.rt_period);
|
||||
runtime = child->rt_bandwidth.rt_runtime;
|
||||
|
||||
if (child == d->tg) {
|
||||
period = d->rt_period;
|
||||
runtime = d->rt_runtime;
|
||||
}
|
||||
|
||||
sum += to_ratio(period, runtime);
|
||||
}
|
||||
|
||||
if (sum > total)
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
|
||||
{
|
||||
struct rt_schedulable_data data = {
|
||||
.tg = tg,
|
||||
.rt_period = period,
|
||||
.rt_runtime = runtime,
|
||||
};
|
||||
|
||||
return walk_tg_tree(tg_schedulable, tg_nop, &data);
|
||||
}
|
||||
|
||||
static int tg_set_bandwidth(struct task_group *tg,
|
||||
u64 rt_period, u64 rt_runtime)
|
||||
{
|
||||
@@ -8829,14 +8925,9 @@ static int tg_set_bandwidth(struct task_group *tg,
|
||||
|
||||
mutex_lock(&rt_constraints_mutex);
|
||||
read_lock(&tasklist_lock);
|
||||
if (rt_runtime == 0 && tg_has_rt_tasks(tg)) {
|
||||
err = -EBUSY;
|
||||
err = __rt_schedulable(tg, rt_period, rt_runtime);
|
||||
if (err)
|
||||
goto unlock;
|
||||
}
|
||||
if (!__rt_schedulable(tg, rt_period, rt_runtime)) {
|
||||
err = -EINVAL;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock);
|
||||
tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period);
|
||||
@@ -8905,19 +8996,25 @@ long sched_group_rt_period(struct task_group *tg)
|
||||
|
||||
static int sched_rt_global_constraints(void)
|
||||
{
|
||||
struct task_group *tg = &root_task_group;
|
||||
u64 rt_runtime, rt_period;
|
||||
u64 runtime, period;
|
||||
int ret = 0;
|
||||
|
||||
if (sysctl_sched_rt_period <= 0)
|
||||
return -EINVAL;
|
||||
|
||||
rt_period = ktime_to_ns(tg->rt_bandwidth.rt_period);
|
||||
rt_runtime = tg->rt_bandwidth.rt_runtime;
|
||||
runtime = global_rt_runtime();
|
||||
period = global_rt_period();
|
||||
|
||||
/*
|
||||
* Sanity check on the sysctl variables.
|
||||
*/
|
||||
if (runtime > period && runtime != RUNTIME_INF)
|
||||
return -EINVAL;
|
||||
|
||||
mutex_lock(&rt_constraints_mutex);
|
||||
if (!__rt_schedulable(tg, rt_period, rt_runtime))
|
||||
ret = -EINVAL;
|
||||
read_lock(&tasklist_lock);
|
||||
ret = __rt_schedulable(NULL, 0, 0);
|
||||
read_unlock(&tasklist_lock);
|
||||
mutex_unlock(&rt_constraints_mutex);
|
||||
|
||||
return ret;
|
||||
@@ -8991,7 +9088,6 @@ cpu_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cgrp)
|
||||
|
||||
if (!cgrp->parent) {
|
||||
/* This is early initialization for the top cgroup */
|
||||
init_task_group.css.cgroup = cgrp;
|
||||
return &init_task_group.css;
|
||||
}
|
||||
|
||||
@@ -9000,9 +9096,6 @@ cpu_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cgrp)
|
||||
if (IS_ERR(tg))
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
/* Bind the cgroup to task_group object we just created */
|
||||
tg->css.cgroup = cgrp;
|
||||
|
||||
return &tg->css;
|
||||
}
|
||||
|
||||
|
||||
+51
-171
@@ -408,64 +408,6 @@ static u64 sched_vslice_add(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
return __sched_period(nr_running);
|
||||
}
|
||||
|
||||
/*
|
||||
* The goal of calc_delta_asym() is to be asymmetrically around NICE_0_LOAD, in
|
||||
* that it favours >=0 over <0.
|
||||
*
|
||||
* -20 |
|
||||
* |
|
||||
* 0 --------+-------
|
||||
* .'
|
||||
* 19 .'
|
||||
*
|
||||
*/
|
||||
static unsigned long
|
||||
calc_delta_asym(unsigned long delta, struct sched_entity *se)
|
||||
{
|
||||
struct load_weight lw = {
|
||||
.weight = NICE_0_LOAD,
|
||||
.inv_weight = 1UL << (WMULT_SHIFT-NICE_0_SHIFT)
|
||||
};
|
||||
|
||||
for_each_sched_entity(se) {
|
||||
struct load_weight *se_lw = &se->load;
|
||||
unsigned long rw = cfs_rq_of(se)->load.weight;
|
||||
|
||||
#ifdef CONFIG_FAIR_SCHED_GROUP
|
||||
struct cfs_rq *cfs_rq = se->my_q;
|
||||
struct task_group *tg = NULL
|
||||
|
||||
if (cfs_rq)
|
||||
tg = cfs_rq->tg;
|
||||
|
||||
if (tg && tg->shares < NICE_0_LOAD) {
|
||||
/*
|
||||
* scale shares to what it would have been had
|
||||
* tg->weight been NICE_0_LOAD:
|
||||
*
|
||||
* weight = 1024 * shares / tg->weight
|
||||
*/
|
||||
lw.weight *= se->load.weight;
|
||||
lw.weight /= tg->shares;
|
||||
|
||||
lw.inv_weight = 0;
|
||||
|
||||
se_lw = &lw;
|
||||
rw += lw.weight - se->load.weight;
|
||||
} else
|
||||
#endif
|
||||
|
||||
if (se->load.weight < NICE_0_LOAD) {
|
||||
se_lw = &lw;
|
||||
rw += NICE_0_LOAD - se->load.weight;
|
||||
}
|
||||
|
||||
delta = calc_delta_mine(delta, rw, se_lw);
|
||||
}
|
||||
|
||||
return delta;
|
||||
}
|
||||
|
||||
/*
|
||||
* Update the current task's runtime statistics. Skip current tasks that
|
||||
* are not in our scheduling class.
|
||||
@@ -586,11 +528,12 @@ account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
update_load_add(&cfs_rq->load, se->load.weight);
|
||||
if (!parent_entity(se))
|
||||
inc_cpu_load(rq_of(cfs_rq), se->load.weight);
|
||||
if (entity_is_task(se))
|
||||
if (entity_is_task(se)) {
|
||||
add_cfs_task_weight(cfs_rq, se->load.weight);
|
||||
list_add(&se->group_node, &cfs_rq->tasks);
|
||||
}
|
||||
cfs_rq->nr_running++;
|
||||
se->on_rq = 1;
|
||||
list_add(&se->group_node, &cfs_rq->tasks);
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -599,11 +542,12 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
update_load_sub(&cfs_rq->load, se->load.weight);
|
||||
if (!parent_entity(se))
|
||||
dec_cpu_load(rq_of(cfs_rq), se->load.weight);
|
||||
if (entity_is_task(se))
|
||||
if (entity_is_task(se)) {
|
||||
add_cfs_task_weight(cfs_rq, -se->load.weight);
|
||||
list_del_init(&se->group_node);
|
||||
}
|
||||
cfs_rq->nr_running--;
|
||||
se->on_rq = 0;
|
||||
list_del_init(&se->group_node);
|
||||
}
|
||||
|
||||
static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
||||
@@ -1085,7 +1029,6 @@ static long effective_load(struct task_group *tg, int cpu,
|
||||
long wl, long wg)
|
||||
{
|
||||
struct sched_entity *se = tg->se[cpu];
|
||||
long more_w;
|
||||
|
||||
if (!tg->parent)
|
||||
return wl;
|
||||
@@ -1097,18 +1040,17 @@ static long effective_load(struct task_group *tg, int cpu,
|
||||
if (!wl && sched_feat(ASYM_EFF_LOAD))
|
||||
return wl;
|
||||
|
||||
/*
|
||||
* Instead of using this increment, also add the difference
|
||||
* between when the shares were last updated and now.
|
||||
*/
|
||||
more_w = se->my_q->load.weight - se->my_q->rq_weight;
|
||||
wl += more_w;
|
||||
wg += more_w;
|
||||
|
||||
for_each_sched_entity(se) {
|
||||
#define D(n) (likely(n) ? (n) : 1)
|
||||
|
||||
long S, rw, s, a, b;
|
||||
long more_w;
|
||||
|
||||
/*
|
||||
* Instead of using this increment, also add the difference
|
||||
* between when the shares were last updated and now.
|
||||
*/
|
||||
more_w = se->my_q->load.weight - se->my_q->rq_weight;
|
||||
wl += more_w;
|
||||
wg += more_w;
|
||||
|
||||
S = se->my_q->tg->shares;
|
||||
s = se->my_q->shares;
|
||||
@@ -1117,7 +1059,11 @@ static long effective_load(struct task_group *tg, int cpu,
|
||||
a = S*(rw + wl);
|
||||
b = S*rw + s*wg;
|
||||
|
||||
wl = s*(a-b)/D(b);
|
||||
wl = s*(a-b);
|
||||
|
||||
if (likely(b))
|
||||
wl /= b;
|
||||
|
||||
/*
|
||||
* Assume the group is already running and will
|
||||
* thus already be accounted for in the weight.
|
||||
@@ -1126,7 +1072,6 @@ static long effective_load(struct task_group *tg, int cpu,
|
||||
* alter the group weight.
|
||||
*/
|
||||
wg = 0;
|
||||
#undef D
|
||||
}
|
||||
|
||||
return wl;
|
||||
@@ -1143,7 +1088,7 @@ static inline unsigned long effective_load(struct task_group *tg, int cpu,
|
||||
#endif
|
||||
|
||||
static int
|
||||
wake_affine(struct rq *rq, struct sched_domain *this_sd, struct rq *this_rq,
|
||||
wake_affine(struct sched_domain *this_sd, struct rq *this_rq,
|
||||
struct task_struct *p, int prev_cpu, int this_cpu, int sync,
|
||||
int idx, unsigned long load, unsigned long this_load,
|
||||
unsigned int imbalance)
|
||||
@@ -1191,8 +1136,8 @@ wake_affine(struct rq *rq, struct sched_domain *this_sd, struct rq *this_rq,
|
||||
schedstat_inc(p, se.nr_wakeups_affine_attempts);
|
||||
tl_per_task = cpu_avg_load_per_task(this_cpu);
|
||||
|
||||
if ((tl <= load && tl + target_load(prev_cpu, idx) <= tl_per_task) ||
|
||||
balanced) {
|
||||
if (balanced || (tl <= load && tl + target_load(prev_cpu, idx) <=
|
||||
tl_per_task)) {
|
||||
/*
|
||||
* This domain has SD_WAKE_AFFINE and
|
||||
* p is cache cold in this domain, and
|
||||
@@ -1211,16 +1156,17 @@ static int select_task_rq_fair(struct task_struct *p, int sync)
|
||||
struct sched_domain *sd, *this_sd = NULL;
|
||||
int prev_cpu, this_cpu, new_cpu;
|
||||
unsigned long load, this_load;
|
||||
struct rq *rq, *this_rq;
|
||||
struct rq *this_rq;
|
||||
unsigned int imbalance;
|
||||
int idx;
|
||||
|
||||
prev_cpu = task_cpu(p);
|
||||
rq = task_rq(p);
|
||||
this_cpu = smp_processor_id();
|
||||
this_rq = cpu_rq(this_cpu);
|
||||
new_cpu = prev_cpu;
|
||||
|
||||
if (prev_cpu == this_cpu)
|
||||
goto out;
|
||||
/*
|
||||
* 'this_sd' is the first domain that both
|
||||
* this_cpu and prev_cpu are present in:
|
||||
@@ -1248,13 +1194,10 @@ static int select_task_rq_fair(struct task_struct *p, int sync)
|
||||
load = source_load(prev_cpu, idx);
|
||||
this_load = target_load(this_cpu, idx);
|
||||
|
||||
if (wake_affine(rq, this_sd, this_rq, p, prev_cpu, this_cpu, sync, idx,
|
||||
if (wake_affine(this_sd, this_rq, p, prev_cpu, this_cpu, sync, idx,
|
||||
load, this_load, imbalance))
|
||||
return this_cpu;
|
||||
|
||||
if (prev_cpu == this_cpu)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Start passive balancing when half the imbalance_pct
|
||||
* limit is reached.
|
||||
@@ -1281,62 +1224,20 @@ static unsigned long wakeup_gran(struct sched_entity *se)
|
||||
* + nice tasks.
|
||||
*/
|
||||
if (sched_feat(ASYM_GRAN))
|
||||
gran = calc_delta_asym(sysctl_sched_wakeup_granularity, se);
|
||||
else
|
||||
gran = calc_delta_fair(sysctl_sched_wakeup_granularity, se);
|
||||
gran = calc_delta_mine(gran, NICE_0_LOAD, &se->load);
|
||||
|
||||
return gran;
|
||||
}
|
||||
|
||||
/*
|
||||
* Should 'se' preempt 'curr'.
|
||||
*
|
||||
* |s1
|
||||
* |s2
|
||||
* |s3
|
||||
* g
|
||||
* |<--->|c
|
||||
*
|
||||
* w(c, s1) = -1
|
||||
* w(c, s2) = 0
|
||||
* w(c, s3) = 1
|
||||
*
|
||||
*/
|
||||
static int
|
||||
wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se)
|
||||
{
|
||||
s64 gran, vdiff = curr->vruntime - se->vruntime;
|
||||
|
||||
if (vdiff < 0)
|
||||
return -1;
|
||||
|
||||
gran = wakeup_gran(curr);
|
||||
if (vdiff > gran)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* return depth at which a sched entity is present in the hierarchy */
|
||||
static inline int depth_se(struct sched_entity *se)
|
||||
{
|
||||
int depth = 0;
|
||||
|
||||
for_each_sched_entity(se)
|
||||
depth++;
|
||||
|
||||
return depth;
|
||||
}
|
||||
|
||||
/*
|
||||
* Preempt the current task with a newly woken task if needed:
|
||||
*/
|
||||
static void check_preempt_wakeup(struct rq *rq, struct task_struct *p)
|
||||
static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
|
||||
{
|
||||
struct task_struct *curr = rq->curr;
|
||||
struct cfs_rq *cfs_rq = task_cfs_rq(curr);
|
||||
struct sched_entity *se = &curr->se, *pse = &p->se;
|
||||
int se_depth, pse_depth;
|
||||
s64 delta_exec;
|
||||
|
||||
if (unlikely(rt_prio(p->prio))) {
|
||||
update_rq_clock(rq);
|
||||
@@ -1350,6 +1251,13 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p)
|
||||
|
||||
cfs_rq_of(pse)->next = pse;
|
||||
|
||||
/*
|
||||
* We can come here with TIF_NEED_RESCHED already set from new task
|
||||
* wake up path.
|
||||
*/
|
||||
if (test_tsk_need_resched(curr))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Batch tasks do not preempt (their preemption is driven by
|
||||
* the tick):
|
||||
@@ -1360,33 +1268,15 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p)
|
||||
if (!sched_feat(WAKEUP_PREEMPT))
|
||||
return;
|
||||
|
||||
/*
|
||||
* preemption test can be made between sibling entities who are in the
|
||||
* same cfs_rq i.e who have a common parent. Walk up the hierarchy of
|
||||
* both tasks until we find their ancestors who are siblings of common
|
||||
* parent.
|
||||
*/
|
||||
|
||||
/* First walk up until both entities are at same depth */
|
||||
se_depth = depth_se(se);
|
||||
pse_depth = depth_se(pse);
|
||||
|
||||
while (se_depth > pse_depth) {
|
||||
se_depth--;
|
||||
se = parent_entity(se);
|
||||
if (sched_feat(WAKEUP_OVERLAP) && sync &&
|
||||
se->avg_overlap < sysctl_sched_migration_cost &&
|
||||
pse->avg_overlap < sysctl_sched_migration_cost) {
|
||||
resched_task(curr);
|
||||
return;
|
||||
}
|
||||
|
||||
while (pse_depth > se_depth) {
|
||||
pse_depth--;
|
||||
pse = parent_entity(pse);
|
||||
}
|
||||
|
||||
while (!is_same_group(se, pse)) {
|
||||
se = parent_entity(se);
|
||||
pse = parent_entity(pse);
|
||||
}
|
||||
|
||||
if (wakeup_preempt_entity(se, pse) == 1)
|
||||
delta_exec = se->sum_exec_runtime - se->prev_sum_exec_runtime;
|
||||
if (delta_exec > wakeup_gran(pse))
|
||||
resched_task(curr);
|
||||
}
|
||||
|
||||
@@ -1445,19 +1335,9 @@ __load_balance_iterator(struct cfs_rq *cfs_rq, struct list_head *next)
|
||||
if (next == &cfs_rq->tasks)
|
||||
return NULL;
|
||||
|
||||
/* Skip over entities that are not tasks */
|
||||
do {
|
||||
se = list_entry(next, struct sched_entity, group_node);
|
||||
next = next->next;
|
||||
} while (next != &cfs_rq->tasks && !entity_is_task(se));
|
||||
|
||||
if (next == &cfs_rq->tasks)
|
||||
return NULL;
|
||||
|
||||
cfs_rq->balance_iterator = next;
|
||||
|
||||
if (entity_is_task(se))
|
||||
p = task_of(se);
|
||||
se = list_entry(next, struct sched_entity, group_node);
|
||||
p = task_of(se);
|
||||
cfs_rq->balance_iterator = next->next;
|
||||
|
||||
return p;
|
||||
}
|
||||
@@ -1507,7 +1387,7 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
|
||||
rcu_read_lock();
|
||||
update_h_load(busiest_cpu);
|
||||
|
||||
list_for_each_entry(tg, &task_groups, list) {
|
||||
list_for_each_entry_rcu(tg, &task_groups, list) {
|
||||
struct cfs_rq *busiest_cfs_rq = tg->cfs_rq[busiest_cpu];
|
||||
unsigned long busiest_h_load = busiest_cfs_rq->h_load;
|
||||
unsigned long busiest_weight = busiest_cfs_rq->load.weight;
|
||||
@@ -1620,10 +1500,10 @@ static void task_new_fair(struct rq *rq, struct task_struct *p)
|
||||
* 'current' within the tree based on its new key value.
|
||||
*/
|
||||
swap(curr->vruntime, se->vruntime);
|
||||
resched_task(rq->curr);
|
||||
}
|
||||
|
||||
enqueue_task_fair(rq, p, 0);
|
||||
resched_task(rq->curr);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1642,7 +1522,7 @@ static void prio_changed_fair(struct rq *rq, struct task_struct *p,
|
||||
if (p->prio > oldprio)
|
||||
resched_task(rq->curr);
|
||||
} else
|
||||
check_preempt_curr(rq, p);
|
||||
check_preempt_curr(rq, p, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1659,7 +1539,7 @@ static void switched_to_fair(struct rq *rq, struct task_struct *p,
|
||||
if (running)
|
||||
resched_task(rq->curr);
|
||||
else
|
||||
check_preempt_curr(rq, p);
|
||||
check_preempt_curr(rq, p, 0);
|
||||
}
|
||||
|
||||
/* Account for a task changing its policy or group.
|
||||
|
||||
@@ -11,3 +11,4 @@ SCHED_FEAT(ASYM_GRAN, 1)
|
||||
SCHED_FEAT(LB_BIAS, 1)
|
||||
SCHED_FEAT(LB_WAKEUP_UPDATE, 1)
|
||||
SCHED_FEAT(ASYM_EFF_LOAD, 1)
|
||||
SCHED_FEAT(WAKEUP_OVERLAP, 0)
|
||||
|
||||
@@ -14,7 +14,7 @@ static int select_task_rq_idle(struct task_struct *p, int sync)
|
||||
/*
|
||||
* Idle tasks are unconditionally rescheduled:
|
||||
*/
|
||||
static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p)
|
||||
static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int sync)
|
||||
{
|
||||
resched_task(rq->idle);
|
||||
}
|
||||
@@ -76,7 +76,7 @@ static void switched_to_idle(struct rq *rq, struct task_struct *p,
|
||||
if (running)
|
||||
resched_task(rq->curr);
|
||||
else
|
||||
check_preempt_curr(rq, p);
|
||||
check_preempt_curr(rq, p, 0);
|
||||
}
|
||||
|
||||
static void prio_changed_idle(struct rq *rq, struct task_struct *p,
|
||||
@@ -93,7 +93,7 @@ static void prio_changed_idle(struct rq *rq, struct task_struct *p,
|
||||
if (p->prio > oldprio)
|
||||
resched_task(rq->curr);
|
||||
} else
|
||||
check_preempt_curr(rq, p);
|
||||
check_preempt_curr(rq, p, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
+51
-6
@@ -102,12 +102,12 @@ static void dequeue_rt_entity(struct sched_rt_entity *rt_se);
|
||||
|
||||
static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
|
||||
{
|
||||
struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr;
|
||||
struct sched_rt_entity *rt_se = rt_rq->rt_se;
|
||||
|
||||
if (rt_se && !on_rt_rq(rt_se) && rt_rq->rt_nr_running) {
|
||||
struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr;
|
||||
|
||||
enqueue_rt_entity(rt_se);
|
||||
if (rt_rq->rt_nr_running) {
|
||||
if (rt_se && !on_rt_rq(rt_se))
|
||||
enqueue_rt_entity(rt_se);
|
||||
if (rt_rq->highest_prio < curr->prio)
|
||||
resched_task(curr);
|
||||
}
|
||||
@@ -231,6 +231,9 @@ static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
|
||||
#endif /* CONFIG_RT_GROUP_SCHED */
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/*
|
||||
* We ran out of runtime, see if we can borrow some from our neighbours.
|
||||
*/
|
||||
static int do_balance_runtime(struct rt_rq *rt_rq)
|
||||
{
|
||||
struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
|
||||
@@ -250,9 +253,18 @@ static int do_balance_runtime(struct rt_rq *rt_rq)
|
||||
continue;
|
||||
|
||||
spin_lock(&iter->rt_runtime_lock);
|
||||
/*
|
||||
* Either all rqs have inf runtime and there's nothing to steal
|
||||
* or __disable_runtime() below sets a specific rq to inf to
|
||||
* indicate its been disabled and disalow stealing.
|
||||
*/
|
||||
if (iter->rt_runtime == RUNTIME_INF)
|
||||
goto next;
|
||||
|
||||
/*
|
||||
* From runqueues with spare time, take 1/n part of their
|
||||
* spare time, but no more than our period.
|
||||
*/
|
||||
diff = iter->rt_runtime - iter->rt_time;
|
||||
if (diff > 0) {
|
||||
diff = div_u64((u64)diff, weight);
|
||||
@@ -274,6 +286,9 @@ next:
|
||||
return more;
|
||||
}
|
||||
|
||||
/*
|
||||
* Ensure this RQ takes back all the runtime it lend to its neighbours.
|
||||
*/
|
||||
static void __disable_runtime(struct rq *rq)
|
||||
{
|
||||
struct root_domain *rd = rq->rd;
|
||||
@@ -289,17 +304,33 @@ static void __disable_runtime(struct rq *rq)
|
||||
|
||||
spin_lock(&rt_b->rt_runtime_lock);
|
||||
spin_lock(&rt_rq->rt_runtime_lock);
|
||||
/*
|
||||
* Either we're all inf and nobody needs to borrow, or we're
|
||||
* already disabled and thus have nothing to do, or we have
|
||||
* exactly the right amount of runtime to take out.
|
||||
*/
|
||||
if (rt_rq->rt_runtime == RUNTIME_INF ||
|
||||
rt_rq->rt_runtime == rt_b->rt_runtime)
|
||||
goto balanced;
|
||||
spin_unlock(&rt_rq->rt_runtime_lock);
|
||||
|
||||
/*
|
||||
* Calculate the difference between what we started out with
|
||||
* and what we current have, that's the amount of runtime
|
||||
* we lend and now have to reclaim.
|
||||
*/
|
||||
want = rt_b->rt_runtime - rt_rq->rt_runtime;
|
||||
|
||||
/*
|
||||
* Greedy reclaim, take back as much as we can.
|
||||
*/
|
||||
for_each_cpu_mask(i, rd->span) {
|
||||
struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
|
||||
s64 diff;
|
||||
|
||||
/*
|
||||
* Can't reclaim from ourselves or disabled runqueues.
|
||||
*/
|
||||
if (iter == rt_rq || iter->rt_runtime == RUNTIME_INF)
|
||||
continue;
|
||||
|
||||
@@ -319,8 +350,16 @@ static void __disable_runtime(struct rq *rq)
|
||||
}
|
||||
|
||||
spin_lock(&rt_rq->rt_runtime_lock);
|
||||
/*
|
||||
* We cannot be left wanting - that would mean some runtime
|
||||
* leaked out of the system.
|
||||
*/
|
||||
BUG_ON(want);
|
||||
balanced:
|
||||
/*
|
||||
* Disable all the borrow logic by pretending we have inf
|
||||
* runtime - in which case borrowing doesn't make sense.
|
||||
*/
|
||||
rt_rq->rt_runtime = RUNTIME_INF;
|
||||
spin_unlock(&rt_rq->rt_runtime_lock);
|
||||
spin_unlock(&rt_b->rt_runtime_lock);
|
||||
@@ -343,6 +382,9 @@ static void __enable_runtime(struct rq *rq)
|
||||
if (unlikely(!scheduler_running))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Reset each runqueue's bandwidth settings
|
||||
*/
|
||||
for_each_leaf_rt_rq(rt_rq, rq) {
|
||||
struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
|
||||
|
||||
@@ -389,7 +431,7 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
|
||||
int i, idle = 1;
|
||||
cpumask_t span;
|
||||
|
||||
if (rt_b->rt_runtime == RUNTIME_INF)
|
||||
if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
|
||||
return 1;
|
||||
|
||||
span = sched_rt_period_mask();
|
||||
@@ -487,6 +529,9 @@ static void update_curr_rt(struct rq *rq)
|
||||
curr->se.exec_start = rq->clock;
|
||||
cpuacct_charge(curr, delta_exec);
|
||||
|
||||
if (!rt_bandwidth_enabled())
|
||||
return;
|
||||
|
||||
for_each_sched_rt_entity(rt_se) {
|
||||
rt_rq = rt_rq_of_se(rt_se);
|
||||
|
||||
@@ -784,7 +829,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
|
||||
/*
|
||||
* Preempt the current task with a newly woken task if needed:
|
||||
*/
|
||||
static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p)
|
||||
static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int sync)
|
||||
{
|
||||
if (p->prio < rq->curr->prio) {
|
||||
resched_task(rq->curr);
|
||||
|
||||
+2
-2
@@ -169,7 +169,7 @@ static ssize_t cpu_rt_runtime_show(struct kobject *kobj,
|
||||
{
|
||||
struct user_struct *up = container_of(kobj, struct user_struct, kobj);
|
||||
|
||||
return sprintf(buf, "%lu\n", sched_group_rt_runtime(up->tg));
|
||||
return sprintf(buf, "%ld\n", sched_group_rt_runtime(up->tg));
|
||||
}
|
||||
|
||||
static ssize_t cpu_rt_runtime_store(struct kobject *kobj,
|
||||
@@ -180,7 +180,7 @@ static ssize_t cpu_rt_runtime_store(struct kobject *kobj,
|
||||
unsigned long rt_runtime;
|
||||
int rc;
|
||||
|
||||
sscanf(buf, "%lu", &rt_runtime);
|
||||
sscanf(buf, "%ld", &rt_runtime);
|
||||
|
||||
rc = sched_group_set_rt_runtime(up->tg, rt_runtime);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user