Files
tegra-linux-noble/include/linux/tick.h
Preeti U Murthy 345527b1ed clockevents: Fix cpu_down() race for hrtimer based broadcasting
It was found when doing a hotplug stress test on POWER, that the
machine either hit softlockups or rcu_sched stall warnings.  The
issue was traced to commit:

  7cba160ad7 ("powernv/cpuidle: Redesign idle states management")

which exposed the cpu_down() race with hrtimer based broadcast mode:

  5d1638acb9 ("tick: Introduce hrtimer based broadcast")

The race is the following:

Assume CPU1 is the CPU which holds the hrtimer broadcasting duty
before it is taken down.

	CPU0					CPU1

	cpu_down()				take_cpu_down()
						disable_interrupts()

	cpu_die()

	while (CPU1 != CPU_DEAD) {
		msleep(100);
		switch_to_idle();
		stop_cpu_timer();
		schedule_broadcast();
	}

	tick_cleanup_cpu_dead()
		take_over_broadcast()

So after CPU1 disabled interrupts it cannot handle the broadcast
hrtimer anymore, so CPU0 will be stuck forever.

Fix this by explicitly taking over broadcast duty before cpu_die().

This is a temporary workaround. What we really want is a callback
in the clockevent device which allows us to do that from the dying
CPU by pushing the hrtimer onto a different cpu. That might involve
an IPI and is definitely more complex than this immediate fix.

Changelog was picked up from:

    https://lkml.org/lkml/2015/2/16/213

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Preeti U. Murthy <preeti@linux.vnet.ibm.com>
Cc: linuxppc-dev@lists.ozlabs.org
Cc: mpe@ellerman.id.au
Cc: nicolas.pitre@linaro.org
Cc: peterz@infradead.org
Cc: rjw@rjwysocki.net
Fixes: http://linuxppc.10917.n7.nabble.com/offlining-cpus-breakage-td88619.html
Link: http://lkml.kernel.org/r/20150330092410.24979.59887.stgit@preeti.in.ibm.com
[ Merged it to the latest timer tree, renamed the callback, tidied up the changelog. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2015-04-02 14:25:39 +02:00

135 lines
3.8 KiB
C

/*
* Tick related global functions
*/
#ifndef _LINUX_TICK_H
#define _LINUX_TICK_H
#include <linux/clockchips.h>
#include <linux/irqflags.h>
#include <linux/percpu.h>
#include <linux/context_tracking_state.h>
#include <linux/cpumask.h>
#include <linux/sched.h>
#ifdef CONFIG_GENERIC_CLOCKEVENTS
extern void __init tick_init(void);
extern void tick_freeze(void);
extern void tick_unfreeze(void);
/* Should be core only, but ARM BL switcher requires it */
extern void tick_suspend_local(void);
/* Should be core only, but XEN resume magic and ARM BL switcher require it */
extern void tick_resume_local(void);
#else /* CONFIG_GENERIC_CLOCKEVENTS */
static inline void tick_init(void) { }
static inline void tick_freeze(void) { }
static inline void tick_unfreeze(void) { }
static inline void tick_suspend_local(void) { }
static inline void tick_resume_local(void) { }
#endif /* !CONFIG_GENERIC_CLOCKEVENTS */
#ifdef CONFIG_TICK_ONESHOT
extern void tick_irq_enter(void);
# ifndef arch_needs_cpu
# define arch_needs_cpu() (0)
# endif
# else
static inline void tick_irq_enter(void) { }
#endif
#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT)
extern void hotplug_cpu__broadcast_tick_pull(int dead_cpu);
#else
static inline void hotplug_cpu__broadcast_tick_pull(int dead_cpu) { }
#endif
#ifdef CONFIG_NO_HZ_COMMON
extern int tick_nohz_tick_stopped(void);
extern void tick_nohz_idle_enter(void);
extern void tick_nohz_idle_exit(void);
extern void tick_nohz_irq_exit(void);
extern ktime_t tick_nohz_get_sleep_length(void);
extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time);
#else /* !CONFIG_NO_HZ_COMMON */
static inline int tick_nohz_tick_stopped(void) { return 0; }
static inline void tick_nohz_idle_enter(void) { }
static inline void tick_nohz_idle_exit(void) { }
static inline ktime_t tick_nohz_get_sleep_length(void)
{
ktime_t len = { .tv64 = NSEC_PER_SEC/HZ };
return len;
}
static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; }
static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; }
#endif /* !CONFIG_NO_HZ_COMMON */
#ifdef CONFIG_NO_HZ_FULL
extern bool tick_nohz_full_running;
extern cpumask_var_t tick_nohz_full_mask;
extern cpumask_var_t housekeeping_mask;
static inline bool tick_nohz_full_enabled(void)
{
if (!context_tracking_is_enabled())
return false;
return tick_nohz_full_running;
}
static inline bool tick_nohz_full_cpu(int cpu)
{
if (!tick_nohz_full_enabled())
return false;
return cpumask_test_cpu(cpu, tick_nohz_full_mask);
}
extern void __tick_nohz_full_check(void);
extern void tick_nohz_full_kick(void);
extern void tick_nohz_full_kick_cpu(int cpu);
extern void tick_nohz_full_kick_all(void);
extern void __tick_nohz_task_switch(struct task_struct *tsk);
#else
static inline bool tick_nohz_full_enabled(void) { return false; }
static inline bool tick_nohz_full_cpu(int cpu) { return false; }
static inline void __tick_nohz_full_check(void) { }
static inline void tick_nohz_full_kick_cpu(int cpu) { }
static inline void tick_nohz_full_kick(void) { }
static inline void tick_nohz_full_kick_all(void) { }
static inline void __tick_nohz_task_switch(struct task_struct *tsk) { }
#endif
static inline bool is_housekeeping_cpu(int cpu)
{
#ifdef CONFIG_NO_HZ_FULL
if (tick_nohz_full_enabled())
return cpumask_test_cpu(cpu, housekeeping_mask);
#endif
return true;
}
static inline void housekeeping_affine(struct task_struct *t)
{
#ifdef CONFIG_NO_HZ_FULL
if (tick_nohz_full_enabled())
set_cpus_allowed_ptr(t, housekeeping_mask);
#endif
}
static inline void tick_nohz_full_check(void)
{
if (tick_nohz_full_enabled())
__tick_nohz_full_check();
}
static inline void tick_nohz_task_switch(struct task_struct *tsk)
{
if (tick_nohz_full_enabled())
__tick_nohz_task_switch(tsk);
}
#endif