It was found when doing a hotplug stress test on POWER, that the machine either hit softlockups or rcu_sched stall warnings. The issue was traced to commit:7cba160ad7("powernv/cpuidle: Redesign idle states management") which exposed the cpu_down() race with hrtimer based broadcast mode:5d1638acb9("tick: Introduce hrtimer based broadcast") The race is the following: Assume CPU1 is the CPU which holds the hrtimer broadcasting duty before it is taken down. CPU0 CPU1 cpu_down() take_cpu_down() disable_interrupts() cpu_die() while (CPU1 != CPU_DEAD) { msleep(100); switch_to_idle(); stop_cpu_timer(); schedule_broadcast(); } tick_cleanup_cpu_dead() take_over_broadcast() So after CPU1 disabled interrupts it cannot handle the broadcast hrtimer anymore, so CPU0 will be stuck forever. Fix this by explicitly taking over broadcast duty before cpu_die(). This is a temporary workaround. What we really want is a callback in the clockevent device which allows us to do that from the dying CPU by pushing the hrtimer onto a different cpu. That might involve an IPI and is definitely more complex than this immediate fix. Changelog was picked up from: https://lkml.org/lkml/2015/2/16/213 Suggested-by: Thomas Gleixner <tglx@linutronix.de> Tested-by: Nicolas Pitre <nico@linaro.org> Signed-off-by: Preeti U. Murthy <preeti@linux.vnet.ibm.com> Cc: linuxppc-dev@lists.ozlabs.org Cc: mpe@ellerman.id.au Cc: nicolas.pitre@linaro.org Cc: peterz@infradead.org Cc: rjw@rjwysocki.net Fixes: http://linuxppc.10917.n7.nabble.com/offlining-cpus-breakage-td88619.html Link: http://lkml.kernel.org/r/20150330092410.24979.59887.stgit@preeti.in.ibm.com [ Merged it to the latest timer tree, renamed the callback, tidied up the changelog. ] Signed-off-by: Ingo Molnar <mingo@kernel.org>
135 lines
3.8 KiB
C
135 lines
3.8 KiB
C
/*
|
|
* Tick related global functions
|
|
*/
|
|
#ifndef _LINUX_TICK_H
|
|
#define _LINUX_TICK_H
|
|
|
|
#include <linux/clockchips.h>
|
|
#include <linux/irqflags.h>
|
|
#include <linux/percpu.h>
|
|
#include <linux/context_tracking_state.h>
|
|
#include <linux/cpumask.h>
|
|
#include <linux/sched.h>
|
|
|
|
#ifdef CONFIG_GENERIC_CLOCKEVENTS
|
|
extern void __init tick_init(void);
|
|
extern void tick_freeze(void);
|
|
extern void tick_unfreeze(void);
|
|
/* Should be core only, but ARM BL switcher requires it */
|
|
extern void tick_suspend_local(void);
|
|
/* Should be core only, but XEN resume magic and ARM BL switcher require it */
|
|
extern void tick_resume_local(void);
|
|
#else /* CONFIG_GENERIC_CLOCKEVENTS */
|
|
static inline void tick_init(void) { }
|
|
static inline void tick_freeze(void) { }
|
|
static inline void tick_unfreeze(void) { }
|
|
static inline void tick_suspend_local(void) { }
|
|
static inline void tick_resume_local(void) { }
|
|
#endif /* !CONFIG_GENERIC_CLOCKEVENTS */
|
|
|
|
#ifdef CONFIG_TICK_ONESHOT
|
|
extern void tick_irq_enter(void);
|
|
# ifndef arch_needs_cpu
|
|
# define arch_needs_cpu() (0)
|
|
# endif
|
|
# else
|
|
static inline void tick_irq_enter(void) { }
|
|
#endif
|
|
|
|
#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT)
|
|
extern void hotplug_cpu__broadcast_tick_pull(int dead_cpu);
|
|
#else
|
|
static inline void hotplug_cpu__broadcast_tick_pull(int dead_cpu) { }
|
|
#endif
|
|
|
|
#ifdef CONFIG_NO_HZ_COMMON
|
|
extern int tick_nohz_tick_stopped(void);
|
|
extern void tick_nohz_idle_enter(void);
|
|
extern void tick_nohz_idle_exit(void);
|
|
extern void tick_nohz_irq_exit(void);
|
|
extern ktime_t tick_nohz_get_sleep_length(void);
|
|
extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
|
|
extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time);
|
|
#else /* !CONFIG_NO_HZ_COMMON */
|
|
static inline int tick_nohz_tick_stopped(void) { return 0; }
|
|
static inline void tick_nohz_idle_enter(void) { }
|
|
static inline void tick_nohz_idle_exit(void) { }
|
|
|
|
static inline ktime_t tick_nohz_get_sleep_length(void)
|
|
{
|
|
ktime_t len = { .tv64 = NSEC_PER_SEC/HZ };
|
|
|
|
return len;
|
|
}
|
|
static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; }
|
|
static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; }
|
|
#endif /* !CONFIG_NO_HZ_COMMON */
|
|
|
|
#ifdef CONFIG_NO_HZ_FULL
|
|
extern bool tick_nohz_full_running;
|
|
extern cpumask_var_t tick_nohz_full_mask;
|
|
extern cpumask_var_t housekeeping_mask;
|
|
|
|
static inline bool tick_nohz_full_enabled(void)
|
|
{
|
|
if (!context_tracking_is_enabled())
|
|
return false;
|
|
|
|
return tick_nohz_full_running;
|
|
}
|
|
|
|
static inline bool tick_nohz_full_cpu(int cpu)
|
|
{
|
|
if (!tick_nohz_full_enabled())
|
|
return false;
|
|
|
|
return cpumask_test_cpu(cpu, tick_nohz_full_mask);
|
|
}
|
|
|
|
extern void __tick_nohz_full_check(void);
|
|
extern void tick_nohz_full_kick(void);
|
|
extern void tick_nohz_full_kick_cpu(int cpu);
|
|
extern void tick_nohz_full_kick_all(void);
|
|
extern void __tick_nohz_task_switch(struct task_struct *tsk);
|
|
#else
|
|
static inline bool tick_nohz_full_enabled(void) { return false; }
|
|
static inline bool tick_nohz_full_cpu(int cpu) { return false; }
|
|
static inline void __tick_nohz_full_check(void) { }
|
|
static inline void tick_nohz_full_kick_cpu(int cpu) { }
|
|
static inline void tick_nohz_full_kick(void) { }
|
|
static inline void tick_nohz_full_kick_all(void) { }
|
|
static inline void __tick_nohz_task_switch(struct task_struct *tsk) { }
|
|
#endif
|
|
|
|
static inline bool is_housekeeping_cpu(int cpu)
|
|
{
|
|
#ifdef CONFIG_NO_HZ_FULL
|
|
if (tick_nohz_full_enabled())
|
|
return cpumask_test_cpu(cpu, housekeeping_mask);
|
|
#endif
|
|
return true;
|
|
}
|
|
|
|
static inline void housekeeping_affine(struct task_struct *t)
|
|
{
|
|
#ifdef CONFIG_NO_HZ_FULL
|
|
if (tick_nohz_full_enabled())
|
|
set_cpus_allowed_ptr(t, housekeeping_mask);
|
|
|
|
#endif
|
|
}
|
|
|
|
static inline void tick_nohz_full_check(void)
|
|
{
|
|
if (tick_nohz_full_enabled())
|
|
__tick_nohz_full_check();
|
|
}
|
|
|
|
static inline void tick_nohz_task_switch(struct task_struct *tsk)
|
|
{
|
|
if (tick_nohz_full_enabled())
|
|
__tick_nohz_task_switch(tsk);
|
|
}
|
|
|
|
#endif
|