Merge de10553fce ("Merge tag 'x86-apic-2023-04-24' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip") into android-mainline

Steps on the way to 6.4-rc1

Change-Id: I760dbae6c59b709b4eb2ab689593d2c3e78b37b5
Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
This commit is contained in:
Greg Kroah-Hartman
2023-05-16 11:36:47 +00:00
37 changed files with 569 additions and 307 deletions
+1 -1
View File
@@ -14748,7 +14748,7 @@ F: include/uapi/linux/nitro_enclaves.h
F: samples/nitro_enclaves/
NOHZ, DYNTICKS SUPPORT
M: Frederic Weisbecker <fweisbec@gmail.com>
M: Frederic Weisbecker <frederic@kernel.org>
M: Thomas Gleixner <tglx@linutronix.de>
M: Ingo Molnar <mingo@kernel.org>
L: linux-kernel@vger.kernel.org
+1 -3
View File
@@ -1,8 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
# Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before
# the inclusion of generic Makefile.
ARCH_REL_TYPE_ABS := R_ARM_JUMP_SLOT|R_ARM_GLOB_DAT|R_ARM_ABS32
# Include the generic Makefile to check the built vdso.
include $(srctree)/lib/vdso/Makefile
hostprogs := vdsomunge
+1 -3
View File
@@ -6,9 +6,7 @@
# Heavily based on the vDSO Makefiles for other archs.
#
# Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before
# the inclusion of generic Makefile.
ARCH_REL_TYPE_ABS := R_AARCH64_JUMP_SLOT|R_AARCH64_GLOB_DAT|R_AARCH64_ABS64
# Include the generic Makefile to check the built vdso.
include $(srctree)/lib/vdso/Makefile
obj-vdso := vgettimeofday.o note.o sigreturn.o
-3
View File
@@ -3,9 +3,6 @@
# Makefile for vdso32
#
# Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before
# the inclusion of generic Makefile.
ARCH_REL_TYPE_ABS := R_ARM_JUMP_SLOT|R_ARM_GLOB_DAT|R_ARM_ABS32
include $(srctree)/lib/vdso/Makefile
# Same as cc-*option, but using CC_COMPAT instead of CC
+1 -3
View File
@@ -1,8 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
# Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before
# the inclusion of generic Makefile.
ARCH_REL_TYPE_ABS := R_CKCORE_ADDR32|R_CKCORE_JUMP_SLOT
# Include the generic Makefile to check the built vdso.
include $(srctree)/lib/vdso/Makefile
# Symbols present in the vdso
+1 -3
View File
@@ -1,9 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
# Objects to go into the VDSO.
# Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before
# the inclusion of generic Makefile.
ARCH_REL_TYPE_ABS := R_LARCH_32|R_LARCH_64|R_LARCH_MARK_LA|R_LARCH_JUMP_SLOT
# Include the generic Makefile to check the built vdso.
include $(srctree)/lib/vdso/Makefile
obj-vdso-y := elf.o vgetcpu.o vgettimeofday.o sigreturn.o
+1 -3
View File
@@ -4,9 +4,7 @@
# Sanitizer runtimes are unavailable and cannot be linked here.
KCSAN_SANITIZE := n
# Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before
# the inclusion of generic Makefile.
ARCH_REL_TYPE_ABS := R_MIPS_JUMP_SLOT|R_MIPS_GLOB_DAT
# Include the generic Makefile to check the built vdso.
include $(srctree)/lib/vdso/Makefile
obj-vdso-y := elf.o vgettimeofday.o sigreturn.o
+1 -1
View File
@@ -2,7 +2,7 @@
# List of files in the vdso, has to be asm only for now
ARCH_REL_TYPE_ABS := R_PPC_JUMP_SLOT|R_PPC_GLOB_DAT|R_PPC_ADDR32|R_PPC_ADDR24|R_PPC_ADDR16|R_PPC_ADDR16_LO|R_PPC_ADDR16_HI|R_PPC_ADDR16_HA|R_PPC_ADDR14|R_PPC_ADDR14_BRTAKEN|R_PPC_ADDR14_BRNTAKEN|R_PPC_REL24
# Include the generic Makefile to check the built vdso.
include $(srctree)/lib/vdso/Makefile
obj-vdso32 = sigtramp32-32.o gettimeofday-32.o datapage-32.o cacheflush-32.o note-32.o getcpu-32.o
+1 -3
View File
@@ -1,9 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
# Copied from arch/tile/kernel/vdso/Makefile
# Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before
# the inclusion of generic Makefile.
ARCH_REL_TYPE_ABS := R_RISCV_32|R_RISCV_64|R_RISCV_JUMP_SLOT
# Include the generic Makefile to check the built vdso.
include $(srctree)/lib/vdso/Makefile
# Symbols present in the vdso
vdso-syms = rt_sigreturn
+1 -2
View File
@@ -2,9 +2,8 @@
# List of files in the vdso
KCOV_INSTRUMENT := n
ARCH_REL_TYPE_ABS := R_390_COPY|R_390_GLOB_DAT|R_390_JMP_SLOT|R_390_RELATIVE
ARCH_REL_TYPE_ABS += R_390_GOT|R_390_PLT
# Include the generic Makefile to check the built vdso.
include $(srctree)/lib/vdso/Makefile
obj-vdso32 = vdso_user_wrapper-32.o note-32.o
+1 -2
View File
@@ -2,9 +2,8 @@
# List of files in the vdso
KCOV_INSTRUMENT := n
ARCH_REL_TYPE_ABS := R_390_COPY|R_390_GLOB_DAT|R_390_JMP_SLOT|R_390_RELATIVE
ARCH_REL_TYPE_ABS += R_390_GOT|R_390_PLT
# Include the generic Makefile to check the built vdso.
include $(srctree)/lib/vdso/Makefile
obj-vdso64 = vdso_user_wrapper.o note.o
obj-cvdso64 = vdso64_generic.o getcpu.o
+4 -9
View File
@@ -13,7 +13,7 @@
#include <asm/coco.h>
#include <asm/processor.h>
static enum cc_vendor vendor __ro_after_init;
enum cc_vendor cc_vendor __ro_after_init;
static u64 cc_mask __ro_after_init;
static bool intel_cc_platform_has(enum cc_attr attr)
@@ -99,7 +99,7 @@ static bool amd_cc_platform_has(enum cc_attr attr)
bool cc_platform_has(enum cc_attr attr)
{
switch (vendor) {
switch (cc_vendor) {
case CC_VENDOR_AMD:
return amd_cc_platform_has(attr);
case CC_VENDOR_INTEL:
@@ -119,7 +119,7 @@ u64 cc_mkenc(u64 val)
* - for AMD, bit *set* means the page is encrypted
* - for AMD with vTOM and for Intel, *clear* means encrypted
*/
switch (vendor) {
switch (cc_vendor) {
case CC_VENDOR_AMD:
if (sev_status & MSR_AMD64_SNP_VTOM)
return val & ~cc_mask;
@@ -135,7 +135,7 @@ u64 cc_mkenc(u64 val)
u64 cc_mkdec(u64 val)
{
/* See comment in cc_mkenc() */
switch (vendor) {
switch (cc_vendor) {
case CC_VENDOR_AMD:
if (sev_status & MSR_AMD64_SNP_VTOM)
return val | cc_mask;
@@ -149,11 +149,6 @@ u64 cc_mkdec(u64 val)
}
EXPORT_SYMBOL_GPL(cc_mkdec);
__init void cc_set_vendor(enum cc_vendor v)
{
vendor = v;
}
__init void cc_set_mask(u64 mask)
{
cc_mask = mask;
+1 -4
View File
@@ -3,10 +3,7 @@
# Building vDSO images for x86.
#
# Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before
# the inclusion of generic Makefile.
ARCH_REL_TYPE_ABS := R_X86_64_JUMP_SLOT|R_X86_64_GLOB_DAT|R_X86_64_RELATIVE|
ARCH_REL_TYPE_ABS += R_386_GLOB_DAT|R_386_JMP_SLOT|R_386_RELATIVE
# Include the generic Makefile to check the built vdso.
include $(srctree)/lib/vdso/Makefile
# Sanitizer runtimes are unavailable and cannot be linked here.
+20 -3
View File
@@ -10,13 +10,30 @@ enum cc_vendor {
CC_VENDOR_INTEL,
};
void cc_set_vendor(enum cc_vendor v);
void cc_set_mask(u64 mask);
#ifdef CONFIG_ARCH_HAS_CC_PLATFORM
extern enum cc_vendor cc_vendor;
static inline enum cc_vendor cc_get_vendor(void)
{
return cc_vendor;
}
static inline void cc_set_vendor(enum cc_vendor vendor)
{
cc_vendor = vendor;
}
void cc_set_mask(u64 mask);
u64 cc_mkenc(u64 val);
u64 cc_mkdec(u64 val);
#else
static inline enum cc_vendor cc_get_vendor(void)
{
return CC_VENDOR_NONE;
}
static inline void cc_set_vendor(enum cc_vendor vendor) { }
static inline u64 cc_mkenc(u64 val)
{
return val;
+5 -1
View File
@@ -647,7 +647,11 @@ static inline void spin_lock_prefetch(const void *x)
#define KSTK_ESP(task) (task_pt_regs(task)->sp)
#else
#define INIT_THREAD { }
extern unsigned long __end_init_task[];
#define INIT_THREAD { \
.sp = (unsigned long)&__end_init_task - sizeof(struct pt_regs), \
}
extern unsigned long KSTK_ESP(struct task_struct *task);
-1
View File
@@ -59,7 +59,6 @@ extern struct real_mode_header *real_mode_header;
extern unsigned char real_mode_blob_end[];
extern unsigned long initial_code;
extern unsigned long initial_gs;
extern unsigned long initial_stack;
#ifdef CONFIG_AMD_MEM_ENCRYPT
extern unsigned long initial_vc_handler;
+4 -1
View File
@@ -199,5 +199,8 @@ extern void nmi_selftest(void);
#define nmi_selftest() do { } while (0)
#endif
#endif /* __ASSEMBLY__ */
extern unsigned int smpboot_control;
#endif /* !__ASSEMBLY__ */
#endif /* _ASM_X86_SMP_H */
+18 -5
View File
@@ -111,13 +111,26 @@ int x86_acpi_suspend_lowlevel(void)
saved_magic = 0x12345678;
#else /* CONFIG_64BIT */
#ifdef CONFIG_SMP
initial_stack = (unsigned long)temp_stack + sizeof(temp_stack);
early_gdt_descr.address =
(unsigned long)get_cpu_gdt_rw(smp_processor_id());
initial_gs = per_cpu_offset(smp_processor_id());
/*
* As each CPU starts up, it will find its own stack pointer
* from its current_task->thread.sp. Typically that will be
* the idle thread for a newly-started AP, or even the boot
* CPU which will find it set to &init_task in the static
* per-cpu data.
*
* Make the resuming CPU use the temporary stack at startup
* by setting current->thread.sp to point to that. The true
* %rsp will be restored with the rest of the CPU context,
* by do_suspend_lowlevel(). And unwinders don't care about
* the abuse of ->thread.sp because it's a dead variable
* while the thread is running on the CPU anyway; the true
* value is in the actual %rsp register.
*/
current->thread.sp = (unsigned long)temp_stack + sizeof(temp_stack);
smpboot_control = smp_processor_id();
#endif
initial_code = (unsigned long)wakeup_long64;
saved_magic = 0x123456789abcdef0L;
saved_magic = 0x123456789abcdef0L;
#endif /* CONFIG_64BIT */
/*
+2 -3
View File
@@ -422,10 +422,9 @@ static unsigned int reserve_eilvt_offset(int offset, unsigned int new)
if (vector && !eilvt_entry_is_changeable(vector, new))
/* may not change if vectors are different */
return rsvd;
rsvd = atomic_cmpxchg(&eilvt_offsets[offset], rsvd, new);
} while (rsvd != new);
} while (!atomic_try_cmpxchg(&eilvt_offsets[offset], &rsvd, new));
rsvd &= ~APIC_EILVT_MASKED;
rsvd = new & ~APIC_EILVT_MASKED;
if (rsvd && rsvd != vector)
pr_info("LVT offset %d assigned for vector 0x%02x\n",
offset, rsvd);
+9 -5
View File
@@ -2478,17 +2478,21 @@ static int io_apic_get_redir_entries(int ioapic)
unsigned int arch_dynirq_lower_bound(unsigned int from)
{
unsigned int ret;
/*
* dmar_alloc_hwirq() may be called before setup_IO_APIC(), so use
* gsi_top if ioapic_dynirq_base hasn't been initialized yet.
*/
if (!ioapic_initialized)
return gsi_top;
ret = ioapic_dynirq_base ? : gsi_top;
/*
* For DT enabled machines ioapic_dynirq_base is irrelevant and not
* updated. So simply return @from if ioapic_dynirq_base == 0.
* For DT enabled machines ioapic_dynirq_base is irrelevant and
* always 0. gsi_top can be 0 if there is no IO/APIC registered.
* 0 is an invalid interrupt number for dynamic allocations. Return
* @from instead.
*/
return ioapic_dynirq_base ? : from;
return ret ? : from;
}
#ifdef CONFIG_X86_32
+85 -47
View File
@@ -9,11 +9,7 @@
#include "local.h"
struct cluster_mask {
unsigned int clusterid;
int node;
struct cpumask mask;
};
#define apic_cluster(apicid) ((apicid) >> 4)
/*
* __x2apic_send_IPI_mask() possibly needs to read
@@ -23,8 +19,7 @@ struct cluster_mask {
static u32 *x86_cpu_to_logical_apicid __read_mostly;
static DEFINE_PER_CPU(cpumask_var_t, ipi_mask);
static DEFINE_PER_CPU_READ_MOSTLY(struct cluster_mask *, cluster_masks);
static struct cluster_mask *cluster_hotplug_mask;
static DEFINE_PER_CPU_READ_MOSTLY(struct cpumask *, cluster_masks);
static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
{
@@ -60,10 +55,10 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)
/* Collapse cpus in a cluster so a single IPI per cluster is sent */
for_each_cpu(cpu, tmpmsk) {
struct cluster_mask *cmsk = per_cpu(cluster_masks, cpu);
struct cpumask *cmsk = per_cpu(cluster_masks, cpu);
dest = 0;
for_each_cpu_and(clustercpu, tmpmsk, &cmsk->mask)
for_each_cpu_and(clustercpu, tmpmsk, cmsk)
dest |= x86_cpu_to_logical_apicid[clustercpu];
if (!dest)
@@ -71,7 +66,7 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)
__x2apic_send_IPI_dest(dest, vector, APIC_DEST_LOGICAL);
/* Remove cluster CPUs from tmpmask */
cpumask_andnot(tmpmsk, tmpmsk, &cmsk->mask);
cpumask_andnot(tmpmsk, tmpmsk, cmsk);
}
local_irq_restore(flags);
@@ -105,55 +100,98 @@ static u32 x2apic_calc_apicid(unsigned int cpu)
static void init_x2apic_ldr(void)
{
struct cluster_mask *cmsk = this_cpu_read(cluster_masks);
u32 cluster, apicid = apic_read(APIC_LDR);
unsigned int cpu;
struct cpumask *cmsk = this_cpu_read(cluster_masks);
x86_cpu_to_logical_apicid[smp_processor_id()] = apicid;
BUG_ON(!cmsk);
if (cmsk)
goto update;
cluster = apicid >> 16;
for_each_online_cpu(cpu) {
cmsk = per_cpu(cluster_masks, cpu);
/* Matching cluster found. Link and update it. */
if (cmsk && cmsk->clusterid == cluster)
goto update;
}
cmsk = cluster_hotplug_mask;
cmsk->clusterid = cluster;
cluster_hotplug_mask = NULL;
update:
this_cpu_write(cluster_masks, cmsk);
cpumask_set_cpu(smp_processor_id(), &cmsk->mask);
cpumask_set_cpu(smp_processor_id(), cmsk);
}
static int alloc_clustermask(unsigned int cpu, int node)
/*
* As an optimisation during boot, set the cluster_mask for all present
* CPUs at once, to prevent each of them having to iterate over the others
* to find the existing cluster_mask.
*/
static void prefill_clustermask(struct cpumask *cmsk, unsigned int cpu, u32 cluster)
{
int cpu_i;
for_each_present_cpu(cpu_i) {
struct cpumask **cpu_cmsk = &per_cpu(cluster_masks, cpu_i);
u32 apicid = apic->cpu_present_to_apicid(cpu_i);
if (apicid == BAD_APICID || cpu_i == cpu || apic_cluster(apicid) != cluster)
continue;
if (WARN_ON_ONCE(*cpu_cmsk == cmsk))
continue;
BUG_ON(*cpu_cmsk);
*cpu_cmsk = cmsk;
}
}
static int alloc_clustermask(unsigned int cpu, u32 cluster, int node)
{
struct cpumask *cmsk = NULL;
unsigned int cpu_i;
/*
* At boot time, the CPU present mask is stable. The cluster mask is
* allocated for the first CPU in the cluster and propagated to all
* present siblings in the cluster. If the cluster mask is already set
* on entry to this function for a given CPU, there is nothing to do.
*/
if (per_cpu(cluster_masks, cpu))
return 0;
/*
* If a hotplug spare mask exists, check whether it's on the right
* node. If not, free it and allocate a new one.
*/
if (cluster_hotplug_mask) {
if (cluster_hotplug_mask->node == node)
return 0;
kfree(cluster_hotplug_mask);
}
cluster_hotplug_mask = kzalloc_node(sizeof(*cluster_hotplug_mask),
GFP_KERNEL, node);
if (!cluster_hotplug_mask)
if (system_state < SYSTEM_RUNNING)
goto alloc;
/*
* On post boot hotplug for a CPU which was not present at boot time,
* iterate over all possible CPUs (even those which are not present
* any more) to find any existing cluster mask.
*/
for_each_possible_cpu(cpu_i) {
u32 apicid = apic->cpu_present_to_apicid(cpu_i);
if (apicid != BAD_APICID && apic_cluster(apicid) == cluster) {
cmsk = per_cpu(cluster_masks, cpu_i);
/*
* If the cluster is already initialized, just store
* the mask and return. There's no need to propagate.
*/
if (cmsk) {
per_cpu(cluster_masks, cpu) = cmsk;
return 0;
}
}
}
/*
* No CPU in the cluster has ever been initialized, so fall through to
* the boot time code which will also populate the cluster mask for any
* other CPU in the cluster which is (now) present.
*/
alloc:
cmsk = kzalloc_node(sizeof(*cmsk), GFP_KERNEL, node);
if (!cmsk)
return -ENOMEM;
cluster_hotplug_mask->node = node;
per_cpu(cluster_masks, cpu) = cmsk;
prefill_clustermask(cmsk, cpu, cluster);
return 0;
}
static int x2apic_prepare_cpu(unsigned int cpu)
{
if (alloc_clustermask(cpu, cpu_to_node(cpu)) < 0)
u32 phys_apicid = apic->cpu_present_to_apicid(cpu);
u32 cluster = apic_cluster(phys_apicid);
u32 logical_apicid = (cluster << 16) | (1 << (phys_apicid & 0xf));
x86_cpu_to_logical_apicid[cpu] = logical_apicid;
if (alloc_clustermask(cpu, cluster, cpu_to_node(cpu)) < 0)
return -ENOMEM;
if (!zalloc_cpumask_var(&per_cpu(ipi_mask, cpu), GFP_KERNEL))
return -ENOMEM;
@@ -162,10 +200,10 @@ static int x2apic_prepare_cpu(unsigned int cpu)
static int x2apic_dead_cpu(unsigned int dead_cpu)
{
struct cluster_mask *cmsk = per_cpu(cluster_masks, dead_cpu);
struct cpumask *cmsk = per_cpu(cluster_masks, dead_cpu);
if (cmsk)
cpumask_clear_cpu(dead_cpu, &cmsk->mask);
cpumask_clear_cpu(dead_cpu, cmsk);
free_cpumask_var(per_cpu(ipi_mask, dead_cpu));
return 0;
}
+1
View File
@@ -115,6 +115,7 @@ static void __used common(void)
OFFSET(TSS_sp1, tss_struct, x86_tss.sp1);
OFFSET(TSS_sp2, tss_struct, x86_tss.sp2);
OFFSET(X86_top_of_stack, pcpu_hot, top_of_stack);
OFFSET(X86_current_task, pcpu_hot, current_task);
#ifdef CONFIG_CALL_DEPTH_TRACKING
OFFSET(X86_call_depth, pcpu_hot, call_depth);
#endif
+40 -32
View File
@@ -61,23 +61,15 @@ SYM_CODE_START_NOALIGN(startup_64)
* tables and then reload them.
*/
/* Set up the stack for verify_cpu(), similar to initial_stack below */
leaq (__end_init_task - FRAME_SIZE)(%rip), %rsp
/* Set up the stack for verify_cpu() */
leaq (__end_init_task - PTREGS_SIZE)(%rip), %rsp
leaq _text(%rip), %rdi
/*
* initial_gs points to initial fixed_percpu_data struct with storage for
* the stack protector canary. Global pointer fixups are needed at this
* stage, so apply them as is done in fixup_pointer(), and initialize %gs
* such that the canary can be accessed at %gs:40 for subsequent C calls.
*/
/* Setup GSBASE to allow stack canary access for C code */
movl $MSR_GS_BASE, %ecx
movq initial_gs(%rip), %rax
movq $_text, %rdx
subq %rdx, %rax
addq %rdi, %rax
movq %rax, %rdx
leaq INIT_PER_CPU_VAR(fixed_percpu_data)(%rip), %rdx
movl %edx, %eax
shrq $32, %rdx
wrmsr
@@ -241,13 +233,36 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
UNWIND_HINT_EMPTY
ANNOTATE_NOENDBR // above
#ifdef CONFIG_SMP
movl smpboot_control(%rip), %ecx
/* Get the per cpu offset for the given CPU# which is in ECX */
movq __per_cpu_offset(,%rcx,8), %rdx
#else
xorl %edx, %edx /* zero-extended to clear all of RDX */
#endif /* CONFIG_SMP */
/*
* Setup a boot time stack - Any secondary CPU will have lost its stack
* by now because the cr3-switch above unmaps the real-mode stack.
*
* RDX contains the per-cpu offset
*/
movq pcpu_hot + X86_current_task(%rdx), %rax
movq TASK_threadsp(%rax), %rsp
/*
* We must switch to a new descriptor in kernel space for the GDT
* because soon the kernel won't have access anymore to the userspace
* addresses where we're currently running on. We have to do that here
* because in 32bit we couldn't load a 64bit linear address.
*/
lgdt early_gdt_descr(%rip)
subq $16, %rsp
movw $(GDT_SIZE-1), (%rsp)
leaq gdt_page(%rdx), %rax
movq %rax, 2(%rsp)
lgdt (%rsp)
addq $16, %rsp
/* set up data segments */
xorl %eax,%eax
@@ -271,16 +286,13 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
* the per cpu areas are set up.
*/
movl $MSR_GS_BASE,%ecx
movl initial_gs(%rip),%eax
movl initial_gs+4(%rip),%edx
#ifndef CONFIG_SMP
leaq INIT_PER_CPU_VAR(fixed_percpu_data)(%rip), %rdx
#endif
movl %edx, %eax
shrq $32, %rdx
wrmsr
/*
* Setup a boot time stack - Any secondary CPU will have lost its stack
* by now because the cr3-switch above unmaps the real-mode stack
*/
movq initial_stack(%rip), %rsp
/* Setup and Load IDT */
pushq %rsi
call early_setup_idt
@@ -372,7 +384,11 @@ SYM_CODE_END(secondary_startup_64)
SYM_CODE_START(start_cpu0)
ANNOTATE_NOENDBR
UNWIND_HINT_EMPTY
movq initial_stack(%rip), %rsp
/* Find the idle task stack */
movq PER_CPU_VAR(pcpu_hot) + X86_current_task, %rcx
movq TASK_threadsp(%rcx), %rsp
jmp .Ljump_to_C_code
SYM_CODE_END(start_cpu0)
#endif
@@ -416,16 +432,9 @@ SYM_CODE_END(vc_boot_ghcb)
__REFDATA
.balign 8
SYM_DATA(initial_code, .quad x86_64_start_kernel)
SYM_DATA(initial_gs, .quad INIT_PER_CPU_VAR(fixed_percpu_data))
#ifdef CONFIG_AMD_MEM_ENCRYPT
SYM_DATA(initial_vc_handler, .quad handle_vc_boot_ghcb)
#endif
/*
* The FRAME_SIZE gap is a convention which helps the in-kernel unwinder
* reliably detect the end of the stack.
*/
SYM_DATA(initial_stack, .quad init_thread_union + THREAD_SIZE - FRAME_SIZE)
__FINITDATA
__INIT
@@ -657,8 +666,7 @@ SYM_DATA_END(level1_fixmap_pgt)
.data
.align 16
SYM_DATA(early_gdt_descr, .word GDT_ENTRIES*8-1)
SYM_DATA_LOCAL(early_gdt_descr_base, .quad INIT_PER_CPU_VAR(gdt_page))
SYM_DATA(smpboot_control, .long 0)
.align 16
/* This must match the first entry in level2_kernel_pgt */
+19 -11
View File
@@ -121,17 +121,20 @@ int arch_update_cpu_topology(void)
return retval;
}
static unsigned int smpboot_warm_reset_vector_count;
static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
{
unsigned long flags;
spin_lock_irqsave(&rtc_lock, flags);
CMOS_WRITE(0xa, 0xf);
if (!smpboot_warm_reset_vector_count++) {
CMOS_WRITE(0xa, 0xf);
*((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) = start_eip >> 4;
*((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = start_eip & 0xf;
}
spin_unlock_irqrestore(&rtc_lock, flags);
*((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) =
start_eip >> 4;
*((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) =
start_eip & 0xf;
}
static inline void smpboot_restore_warm_reset_vector(void)
@@ -143,10 +146,12 @@ static inline void smpboot_restore_warm_reset_vector(void)
* to default values.
*/
spin_lock_irqsave(&rtc_lock, flags);
CMOS_WRITE(0, 0xf);
if (!--smpboot_warm_reset_vector_count) {
CMOS_WRITE(0, 0xf);
*((volatile u32 *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 0;
}
spin_unlock_irqrestore(&rtc_lock, flags);
*((volatile u32 *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 0;
}
/*
@@ -1059,8 +1064,6 @@ int common_cpu_up(unsigned int cpu, struct task_struct *idle)
#ifdef CONFIG_X86_32
/* Stack for startup_32 can be just as for start_secondary onwards */
per_cpu(pcpu_hot.top_of_stack, cpu) = task_top_of_stack(idle);
#else
initial_gs = per_cpu_offset(cpu);
#endif
return 0;
}
@@ -1086,9 +1089,14 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
start_ip = real_mode_header->trampoline_start64;
#endif
idle->thread.sp = (unsigned long)task_pt_regs(idle);
early_gdt_descr.address = (unsigned long)get_cpu_gdt_rw(cpu);
initial_code = (unsigned long)start_secondary;
initial_stack = idle->thread.sp;
if (IS_ENABLED(CONFIG_X86_32)) {
early_gdt_descr.address = (unsigned long)get_cpu_gdt_rw(cpu);
initial_stack = idle->thread.sp;
} else {
smpboot_control = cpu;
}
/* Enable the espfix hack for this CPU */
init_espfix_ap(cpu);
+1 -1
View File
@@ -49,7 +49,7 @@ SYM_CODE_START(startup_xen)
ANNOTATE_NOENDBR
cld
mov initial_stack(%rip), %rsp
leaq (__end_init_task - PTREGS_SIZE)(%rip), %rsp
/* Set up %gs.
*
+11 -6
View File
@@ -4,6 +4,7 @@
#include <linux/spinlock.h>
#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/alarmtimer.h>
#include <linux/timerqueue.h>
@@ -62,16 +63,18 @@ static inline int clockid_to_fd(const clockid_t clk)
* cpu_timer - Posix CPU timer representation for k_itimer
* @node: timerqueue node to queue in the task/sig
* @head: timerqueue head on which this timer is queued
* @task: Pointer to target task
* @pid: Pointer to target task PID
* @elist: List head for the expiry list
* @firing: Timer is currently firing
* @handling: Pointer to the task which handles expiry
*/
struct cpu_timer {
struct timerqueue_node node;
struct timerqueue_head *head;
struct pid *pid;
struct list_head elist;
int firing;
struct timerqueue_node node;
struct timerqueue_head *head;
struct pid *pid;
struct list_head elist;
int firing;
struct task_struct __rcu *handling;
};
static inline bool cpu_timer_enqueue(struct timerqueue_head *head,
@@ -135,10 +138,12 @@ struct posix_cputimers {
/**
* posix_cputimers_work - Container for task work based posix CPU timer expiry
* @work: The task work to be scheduled
* @mutex: Mutex held around expiry in context of this task work
* @scheduled: @work has been scheduled already, no further processing
*/
struct posix_cputimers_work {
struct callback_head work;
struct mutex mutex;
unsigned int scheduled;
};
+18 -3
View File
@@ -1003,8 +1003,7 @@ static void complete_signal(int sig, struct task_struct *p, enum pid_type type)
/*
* Now find a thread we can wake up to take the signal off the queue.
*
* If the main thread wants the signal, it gets first crack.
* Probably the least surprising to the average bear.
* Try the suggested task first (may or may not be the main thread).
*/
if (wants_signal(sig, p))
t = p;
@@ -1970,8 +1969,24 @@ int send_sigqueue(struct sigqueue *q, struct pid *pid, enum pid_type type)
ret = -1;
rcu_read_lock();
/*
* This function is used by POSIX timers to deliver a timer signal.
* Where type is PIDTYPE_PID (such as for timers with SIGEV_THREAD_ID
* set), the signal must be delivered to the specific thread (queues
* into t->pending).
*
* Where type is not PIDTYPE_PID, signals must be delivered to the
* process. In this case, prefer to deliver to current if it is in
* the same thread group as the target process, which avoids
* unnecessarily waking up a potentially idle task.
*/
t = pid_task(pid, type);
if (!t || !likely(lock_task_sighand(t, &flags)))
if (!t)
goto ret;
if (type != PIDTYPE_PID && same_thread_group(t, current))
t = current;
if (!likely(lock_task_sighand(t, &flags)))
goto ret;
ret = 1; /* the signal is ignored */
+67 -14
View File
@@ -846,6 +846,8 @@ static u64 collect_timerqueue(struct timerqueue_head *head,
return expires;
ctmr->firing = 1;
/* See posix_cpu_timer_wait_running() */
rcu_assign_pointer(ctmr->handling, current);
cpu_timer_dequeue(ctmr);
list_add_tail(&ctmr->elist, firing);
}
@@ -1161,7 +1163,49 @@ static void handle_posix_cpu_timers(struct task_struct *tsk);
#ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK
static void posix_cpu_timers_work(struct callback_head *work)
{
struct posix_cputimers_work *cw = container_of(work, typeof(*cw), work);
mutex_lock(&cw->mutex);
handle_posix_cpu_timers(current);
mutex_unlock(&cw->mutex);
}
/*
* Invoked from the posix-timer core when a cancel operation failed because
* the timer is marked firing. The caller holds rcu_read_lock(), which
* protects the timer and the task which is expiring it from being freed.
*/
static void posix_cpu_timer_wait_running(struct k_itimer *timr)
{
struct task_struct *tsk = rcu_dereference(timr->it.cpu.handling);
/* Has the handling task completed expiry already? */
if (!tsk)
return;
/* Ensure that the task cannot go away */
get_task_struct(tsk);
/* Now drop the RCU protection so the mutex can be locked */
rcu_read_unlock();
/* Wait on the expiry mutex */
mutex_lock(&tsk->posix_cputimers_work.mutex);
/* Release it immediately again. */
mutex_unlock(&tsk->posix_cputimers_work.mutex);
/* Drop the task reference. */
put_task_struct(tsk);
/* Relock RCU so the callsite is balanced */
rcu_read_lock();
}
static void posix_cpu_timer_wait_running_nsleep(struct k_itimer *timr)
{
/* Ensure that timr->it.cpu.handling task cannot go away */
rcu_read_lock();
spin_unlock_irq(&timr->it_lock);
posix_cpu_timer_wait_running(timr);
rcu_read_unlock();
/* @timr is on stack and is valid */
spin_lock_irq(&timr->it_lock);
}
/*
@@ -1177,6 +1221,7 @@ void clear_posix_cputimers_work(struct task_struct *p)
sizeof(p->posix_cputimers_work.work));
init_task_work(&p->posix_cputimers_work.work,
posix_cpu_timers_work);
mutex_init(&p->posix_cputimers_work.mutex);
p->posix_cputimers_work.scheduled = false;
}
@@ -1255,6 +1300,18 @@ static inline void __run_posix_cpu_timers(struct task_struct *tsk)
lockdep_posixtimer_exit();
}
static void posix_cpu_timer_wait_running(struct k_itimer *timr)
{
cpu_relax();
}
static void posix_cpu_timer_wait_running_nsleep(struct k_itimer *timr)
{
spin_unlock_irq(&timr->it_lock);
cpu_relax();
spin_lock_irq(&timr->it_lock);
}
static inline bool posix_cpu_timers_work_scheduled(struct task_struct *tsk)
{
return false;
@@ -1363,6 +1420,8 @@ static void handle_posix_cpu_timers(struct task_struct *tsk)
*/
if (likely(cpu_firing >= 0))
cpu_timer_fire(timer);
/* See posix_cpu_timer_wait_running() */
rcu_assign_pointer(timer->it.cpu.handling, NULL);
spin_unlock(&timer->it_lock);
}
}
@@ -1497,23 +1556,16 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
expires = cpu_timer_getexpires(&timer.it.cpu);
error = posix_cpu_timer_set(&timer, 0, &zero_it, &it);
if (!error) {
/*
* Timer is now unarmed, deletion can not fail.
*/
/* Timer is now unarmed, deletion can not fail. */
posix_cpu_timer_del(&timer);
} else {
while (error == TIMER_RETRY) {
posix_cpu_timer_wait_running_nsleep(&timer);
error = posix_cpu_timer_del(&timer);
}
}
spin_unlock_irq(&timer.it_lock);
while (error == TIMER_RETRY) {
/*
* We need to handle case when timer was or is in the
* middle of firing. In other cases we already freed
* resources.
*/
spin_lock_irq(&timer.it_lock);
error = posix_cpu_timer_del(&timer);
spin_unlock_irq(&timer.it_lock);
}
spin_unlock_irq(&timer.it_lock);
if ((it.it_value.tv_sec | it.it_value.tv_nsec) == 0) {
/*
@@ -1623,6 +1675,7 @@ const struct k_clock clock_posix_cpu = {
.timer_del = posix_cpu_timer_del,
.timer_get = posix_cpu_timer_get,
.timer_rearm = posix_cpu_timer_rearm,
.timer_wait_running = posix_cpu_timer_wait_running,
};
const struct k_clock clock_process = {
+4
View File
@@ -846,6 +846,10 @@ static struct k_itimer *timer_wait_running(struct k_itimer *timer,
rcu_read_lock();
unlock_timer(timer, *flags);
/*
* kc->timer_wait_running() might drop RCU lock. So @timer
* cannot be touched anymore after the function returns!
*/
if (!WARN_ON_ONCE(!kc->timer_wait_running))
kc->timer_wait_running(timer);
+11 -1
View File
@@ -220,9 +220,19 @@ static void tick_setup_device(struct tick_device *td,
* this cpu:
*/
if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) {
ktime_t next_p;
u32 rem;
tick_do_timer_cpu = cpu;
tick_next_period = ktime_get();
next_p = ktime_get();
div_u64_rem(next_p, TICK_NSEC, &rem);
if (rem) {
next_p -= rem;
next_p += TICK_NSEC;
}
tick_next_period = next_p;
#ifdef CONFIG_NO_HZ_FULL
/*
* The boot CPU may be nohz_full, in which case set
+63 -72
View File
@@ -650,43 +650,67 @@ static void tick_nohz_update_jiffies(ktime_t now)
touch_softlockup_watchdog_sched();
}
/*
* Updates the per-CPU time idle statistics counters
*/
static void
update_ts_time_stats(int cpu, struct tick_sched *ts, ktime_t now, u64 *last_update_time)
static void tick_nohz_stop_idle(struct tick_sched *ts, ktime_t now)
{
ktime_t delta;
if (ts->idle_active) {
delta = ktime_sub(now, ts->idle_entrytime);
if (nr_iowait_cpu(cpu) > 0)
ts->iowait_sleeptime = ktime_add(ts->iowait_sleeptime, delta);
else
ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
ts->idle_entrytime = now;
}
if (WARN_ON_ONCE(!ts->idle_active))
return;
if (last_update_time)
*last_update_time = ktime_to_us(now);
delta = ktime_sub(now, ts->idle_entrytime);
}
write_seqcount_begin(&ts->idle_sleeptime_seq);
if (nr_iowait_cpu(smp_processor_id()) > 0)
ts->iowait_sleeptime = ktime_add(ts->iowait_sleeptime, delta);
else
ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
static void tick_nohz_stop_idle(struct tick_sched *ts, ktime_t now)
{
update_ts_time_stats(smp_processor_id(), ts, now, NULL);
ts->idle_entrytime = now;
ts->idle_active = 0;
write_seqcount_end(&ts->idle_sleeptime_seq);
sched_clock_idle_wakeup_event();
}
static void tick_nohz_start_idle(struct tick_sched *ts)
{
write_seqcount_begin(&ts->idle_sleeptime_seq);
ts->idle_entrytime = ktime_get();
ts->idle_active = 1;
write_seqcount_end(&ts->idle_sleeptime_seq);
sched_clock_idle_sleep_event();
}
static u64 get_cpu_sleep_time_us(struct tick_sched *ts, ktime_t *sleeptime,
bool compute_delta, u64 *last_update_time)
{
ktime_t now, idle;
unsigned int seq;
if (!tick_nohz_active)
return -1;
now = ktime_get();
if (last_update_time)
*last_update_time = ktime_to_us(now);
do {
seq = read_seqcount_begin(&ts->idle_sleeptime_seq);
if (ts->idle_active && compute_delta) {
ktime_t delta = ktime_sub(now, ts->idle_entrytime);
idle = ktime_add(*sleeptime, delta);
} else {
idle = *sleeptime;
}
} while (read_seqcount_retry(&ts->idle_sleeptime_seq, seq));
return ktime_to_us(idle);
}
/**
* get_cpu_idle_time_us - get the total idle time of a CPU
* @cpu: CPU number to query
@@ -694,7 +718,10 @@ static void tick_nohz_start_idle(struct tick_sched *ts)
* counters if NULL.
*
* Return the cumulative idle time (since boot) for a given
* CPU, in microseconds.
* CPU, in microseconds. Note this is partially broken due to
* the counter of iowait tasks that can be remotely updated without
* any synchronization. Therefore it is possible to observe backward
* values within two consecutive reads.
*
* This time is measured via accounting rather than sampling,
* and is as accurate as ktime_get() is.
@@ -704,27 +731,9 @@ static void tick_nohz_start_idle(struct tick_sched *ts)
u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)
{
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
ktime_t now, idle;
if (!tick_nohz_active)
return -1;
now = ktime_get();
if (last_update_time) {
update_ts_time_stats(cpu, ts, now, last_update_time);
idle = ts->idle_sleeptime;
} else {
if (ts->idle_active && !nr_iowait_cpu(cpu)) {
ktime_t delta = ktime_sub(now, ts->idle_entrytime);
idle = ktime_add(ts->idle_sleeptime, delta);
} else {
idle = ts->idle_sleeptime;
}
}
return ktime_to_us(idle);
return get_cpu_sleep_time_us(ts, &ts->idle_sleeptime,
!nr_iowait_cpu(cpu), last_update_time);
}
EXPORT_SYMBOL_GPL(get_cpu_idle_time_us);
@@ -735,7 +744,10 @@ EXPORT_SYMBOL_GPL(get_cpu_idle_time_us);
* counters if NULL.
*
* Return the cumulative iowait time (since boot) for a given
* CPU, in microseconds.
* CPU, in microseconds. Note this is partially broken due to
* the counter of iowait tasks that can be remotely updated without
* any synchronization. Therefore it is possible to observe backward
* values within two consecutive reads.
*
* This time is measured via accounting rather than sampling,
* and is as accurate as ktime_get() is.
@@ -745,26 +757,9 @@ EXPORT_SYMBOL_GPL(get_cpu_idle_time_us);
u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time)
{
struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
ktime_t now, iowait;
if (!tick_nohz_active)
return -1;
now = ktime_get();
if (last_update_time) {
update_ts_time_stats(cpu, ts, now, last_update_time);
iowait = ts->iowait_sleeptime;
} else {
if (ts->idle_active && nr_iowait_cpu(cpu) > 0) {
ktime_t delta = ktime_sub(now, ts->idle_entrytime);
iowait = ktime_add(ts->iowait_sleeptime, delta);
} else {
iowait = ts->iowait_sleeptime;
}
}
return ktime_to_us(iowait);
return get_cpu_sleep_time_us(ts, &ts->iowait_sleeptime,
nr_iowait_cpu(cpu), last_update_time);
}
EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us);
@@ -1097,10 +1092,16 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
return true;
}
static void __tick_nohz_idle_stop_tick(struct tick_sched *ts)
/**
* tick_nohz_idle_stop_tick - stop the idle tick from the idle task
*
* When the next event is more than a tick into the future, stop the idle tick
*/
void tick_nohz_idle_stop_tick(void)
{
ktime_t expires;
struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
int cpu = smp_processor_id();
ktime_t expires;
/*
* If tick_nohz_get_sleep_length() ran tick_nohz_next_event(), the
@@ -1132,16 +1133,6 @@ static void __tick_nohz_idle_stop_tick(struct tick_sched *ts)
}
}
/**
* tick_nohz_idle_stop_tick - stop the idle tick from the idle task
*
* When the next event is more than a tick into the future, stop the idle tick
*/
void tick_nohz_idle_stop_tick(void)
{
__tick_nohz_idle_stop_tick(this_cpu_ptr(&tick_cpu_sched));
}
void tick_nohz_idle_retain_tick(void)
{
tick_nohz_retain_tick(this_cpu_ptr(&tick_cpu_sched));
+42 -25
View File
@@ -22,65 +22,82 @@ enum tick_nohz_mode {
/**
* struct tick_sched - sched tick emulation and no idle tick control/stats
* @sched_timer: hrtimer to schedule the periodic tick in high
* resolution mode
* @check_clocks: Notification mechanism about clocksource changes
* @nohz_mode: Mode - one state of tick_nohz_mode
*
* @inidle: Indicator that the CPU is in the tick idle mode
* @tick_stopped: Indicator that the idle tick has been stopped
* @idle_active: Indicator that the CPU is actively in the tick idle mode;
* it is reset during irq handling phases.
* @do_timer_lst: CPU was the last one doing do_timer before going idle
* @do_timer_last: CPU was the last one doing do_timer before going idle
* @got_idle_tick: Tick timer function has run with @inidle set
* @stalled_jiffies: Number of stalled jiffies detected across ticks
* @last_tick_jiffies: Value of jiffies seen on last tick
* @sched_timer: hrtimer to schedule the periodic tick in high
* resolution mode
* @last_tick: Store the last tick expiry time when the tick
* timer is modified for nohz sleeps. This is necessary
* to resume the tick timer operation in the timeline
* when the CPU returns from nohz sleep.
* @next_tick: Next tick to be fired when in dynticks mode.
* @idle_jiffies: jiffies at the entry to idle for idle time accounting
* @idle_waketime: Time when the idle was interrupted
* @idle_entrytime: Time when the idle call was entered
* @nohz_mode: Mode - one state of tick_nohz_mode
* @last_jiffies: Base jiffies snapshot when next event was last computed
* @timer_expires_base: Base time clock monotonic for @timer_expires
* @timer_expires: Anticipated timer expiration time (in case sched tick is stopped)
* @next_timer: Expiry time of next expiring timer for debugging purpose only
* @idle_expires: Next tick in idle, for debugging purpose only
* @idle_calls: Total number of idle calls
* @idle_sleeps: Number of idle calls, where the sched tick was stopped
* @idle_entrytime: Time when the idle call was entered
* @idle_waketime: Time when the idle was interrupted
* @idle_exittime: Time when the idle state was left
* @idle_sleeptime: Sum of the time slept in idle with sched tick stopped
* @iowait_sleeptime: Sum of the time slept in idle with sched tick stopped, with IO outstanding
* @timer_expires: Anticipated timer expiration time (in case sched tick is stopped)
* @timer_expires_base: Base time clock monotonic for @timer_expires
* @next_timer: Expiry time of next expiring timer for debugging purpose only
* @tick_dep_mask: Tick dependency mask - is set, if someone needs the tick
* @last_tick_jiffies: Value of jiffies seen on last tick
* @stalled_jiffies: Number of stalled jiffies detected across ticks
* @check_clocks: Notification mechanism about clocksource changes
*/
struct tick_sched {
struct hrtimer sched_timer;
unsigned long check_clocks;
enum tick_nohz_mode nohz_mode;
/* Common flags */
unsigned int inidle : 1;
unsigned int tick_stopped : 1;
unsigned int idle_active : 1;
unsigned int do_timer_last : 1;
unsigned int got_idle_tick : 1;
/* Tick handling: jiffies stall check */
unsigned int stalled_jiffies;
unsigned long last_tick_jiffies;
/* Tick handling */
struct hrtimer sched_timer;
ktime_t last_tick;
ktime_t next_tick;
unsigned long idle_jiffies;
ktime_t idle_waketime;
/* Idle entry */
seqcount_t idle_sleeptime_seq;
ktime_t idle_entrytime;
/* Tick stop */
enum tick_nohz_mode nohz_mode;
unsigned long last_jiffies;
u64 timer_expires_base;
u64 timer_expires;
u64 next_timer;
ktime_t idle_expires;
unsigned long idle_calls;
unsigned long idle_sleeps;
ktime_t idle_entrytime;
ktime_t idle_waketime;
/* Idle exit */
ktime_t idle_exittime;
ktime_t idle_sleeptime;
ktime_t iowait_sleeptime;
unsigned long last_jiffies;
u64 timer_expires;
u64 timer_expires_base;
u64 next_timer;
ktime_t idle_expires;
/* Full dynticks handling */
atomic_t tick_dep_mask;
unsigned long last_tick_jiffies;
unsigned int stalled_jiffies;
/* Clocksource changes */
unsigned long check_clocks;
};
extern struct tick_sched *tick_get_tick_sched(int cpu);
+4 -9
View File
@@ -5,18 +5,13 @@ GENERIC_VDSO_DIR := $(dir $(GENERIC_VDSO_MK_PATH))
c-gettimeofday-$(CONFIG_GENERIC_GETTIMEOFDAY) := $(addprefix $(GENERIC_VDSO_DIR), gettimeofday.c)
# This cmd checks that the vdso library does not contain absolute relocation
# This cmd checks that the vdso library does not contain dynamic relocations.
# It has to be called after the linking of the vdso library and requires it
# as a parameter.
#
# $(ARCH_REL_TYPE_ABS) is defined in the arch specific makefile and corresponds
# to the absolute relocation types printed by "objdump -R" and accepted by the
# dynamic linker.
ifndef ARCH_REL_TYPE_ABS
$(error ARCH_REL_TYPE_ABS is not set)
endif
# As a workaround for some GNU ld ports which produce unneeded R_*_NONE
# dynamic relocations, ignore R_*_NONE.
quiet_cmd_vdso_check = VDSOCHK $@
cmd_vdso_check = if $(OBJDUMP) -R $@ | grep -E -h "$(ARCH_REL_TYPE_ABS)"; \
cmd_vdso_check = if $(READELF) -rW $@ | grep -v _NONE | grep -q " R_\w*_"; \
then (echo >&2 "$@: dynamic relocations are not supported"; \
rm -f $@; /bin/false); fi
+19 -6
View File
@@ -13,7 +13,9 @@
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
// Test that values in /proc/uptime increment monotonically.
// Test that boottime value in /proc/uptime and CLOCK_BOOTTIME increment
// monotonically. We don't test idle time monotonicity due to broken iowait
// task counting, cf: comment above get_cpu_idle_time_us()
#undef NDEBUG
#include <assert.h>
#include <stdint.h>
@@ -25,20 +27,31 @@
int main(void)
{
uint64_t start, u0, u1, i0, i1;
uint64_t start, u0, u1, c0, c1;
int fd;
fd = open("/proc/uptime", O_RDONLY);
assert(fd >= 0);
proc_uptime(fd, &u0, &i0);
u0 = proc_uptime(fd);
start = u0;
c0 = clock_boottime();
do {
proc_uptime(fd, &u1, &i1);
u1 = proc_uptime(fd);
c1 = clock_boottime();
/* Is /proc/uptime monotonic ? */
assert(u1 >= u0);
assert(i1 >= i0);
/* Is CLOCK_BOOTTIME monotonic ? */
assert(c1 >= c0);
/* Is CLOCK_BOOTTIME VS /proc/uptime monotonic ? */
assert(c0 >= u0);
u0 = u1;
i0 = i1;
c0 = c1;
} while (u1 - start < 100);
return 0;
+20 -7
View File
@@ -13,8 +13,10 @@
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
// Test that values in /proc/uptime increment monotonically
// while shifting across CPUs.
// Test that boottime value in /proc/uptime and CLOCK_BOOTTIME increment
// monotonically while shifting across CPUs. We don't test idle time
// monotonicity due to broken iowait task counting, cf: comment above
// get_cpu_idle_time_us()
#undef NDEBUG
#include <assert.h>
#include <errno.h>
@@ -42,10 +44,10 @@ static inline int sys_sched_setaffinity(pid_t pid, unsigned int len, unsigned lo
int main(void)
{
uint64_t u0, u1, c0, c1;
unsigned int len;
unsigned long *m;
unsigned int cpu;
uint64_t u0, u1, i0, i1;
int fd;
/* find out "nr_cpu_ids" */
@@ -60,7 +62,9 @@ int main(void)
fd = open("/proc/uptime", O_RDONLY);
assert(fd >= 0);
proc_uptime(fd, &u0, &i0);
u0 = proc_uptime(fd);
c0 = clock_boottime();
for (cpu = 0; cpu < len * 8; cpu++) {
memset(m, 0, len);
m[cpu / (8 * sizeof(unsigned long))] |= 1UL << (cpu % (8 * sizeof(unsigned long)));
@@ -68,11 +72,20 @@ int main(void)
/* CPU might not exist, ignore error */
sys_sched_setaffinity(0, len, m);
proc_uptime(fd, &u1, &i1);
u1 = proc_uptime(fd);
c1 = clock_boottime();
/* Is /proc/uptime monotonic ? */
assert(u1 >= u0);
assert(i1 >= i0);
/* Is CLOCK_BOOTTIME monotonic ? */
assert(c1 >= c0);
/* Is CLOCK_BOOTTIME VS /proc/uptime monotonic ? */
assert(c0 >= u0);
u0 = u1;
i0 = i1;
c0 = c1;
}
return 0;
+14 -14
View File
@@ -19,10 +19,22 @@
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <time.h>
#include "proc.h"
static void proc_uptime(int fd, uint64_t *uptime, uint64_t *idle)
static uint64_t clock_boottime(void)
{
struct timespec ts;
int err;
err = clock_gettime(CLOCK_BOOTTIME, &ts);
assert(err >= 0);
return (ts.tv_sec * 100) + (ts.tv_nsec / 10000000);
}
static uint64_t proc_uptime(int fd)
{
uint64_t val1, val2;
char buf[64], *p;
@@ -43,18 +55,6 @@ static void proc_uptime(int fd, uint64_t *uptime, uint64_t *idle)
assert(p[3] == ' ');
val2 = (p[1] - '0') * 10 + p[2] - '0';
*uptime = val1 * 100 + val2;
p += 4;
val1 = xstrtoull(p, &p);
assert(p[0] == '.');
assert('0' <= p[1] && p[1] <= '9');
assert('0' <= p[2] && p[2] <= '9');
assert(p[3] == '\n');
val2 = (p[1] - '0') * 10 + p[2] - '0';
*idle = val1 * 100 + val2;
assert(p + 4 == buf + rv);
return val1 * 100 + val2;
}
@@ -188,6 +188,80 @@ static int check_timer_create(int which)
return 0;
}
int remain;
__thread int got_signal;
static void *distribution_thread(void *arg)
{
while (__atomic_load_n(&remain, __ATOMIC_RELAXED));
return NULL;
}
static void distribution_handler(int nr)
{
if (!__atomic_exchange_n(&got_signal, 1, __ATOMIC_RELAXED))
__atomic_fetch_sub(&remain, 1, __ATOMIC_RELAXED);
}
/*
* Test that all running threads _eventually_ receive CLOCK_PROCESS_CPUTIME_ID
* timer signals. This primarily tests that the kernel does not favour any one.
*/
static int check_timer_distribution(void)
{
int err, i;
timer_t id;
const int nthreads = 10;
pthread_t threads[nthreads];
struct itimerspec val = {
.it_value.tv_sec = 0,
.it_value.tv_nsec = 1000 * 1000,
.it_interval.tv_sec = 0,
.it_interval.tv_nsec = 1000 * 1000,
};
printf("Check timer_create() per process signal distribution... ");
fflush(stdout);
remain = nthreads + 1; /* worker threads + this thread */
signal(SIGALRM, distribution_handler);
err = timer_create(CLOCK_PROCESS_CPUTIME_ID, NULL, &id);
if (err < 0) {
perror("Can't create timer\n");
return -1;
}
err = timer_settime(id, 0, &val, NULL);
if (err < 0) {
perror("Can't set timer\n");
return -1;
}
for (i = 0; i < nthreads; i++) {
if (pthread_create(&threads[i], NULL, distribution_thread, NULL)) {
perror("Can't create thread\n");
return -1;
}
}
/* Wait for all threads to receive the signal. */
while (__atomic_load_n(&remain, __ATOMIC_RELAXED));
for (i = 0; i < nthreads; i++) {
if (pthread_join(threads[i], NULL)) {
perror("Can't join thread\n");
return -1;
}
}
if (timer_delete(id)) {
perror("Can't delete timer\n");
return -1;
}
printf("[OK]\n");
return 0;
}
int main(int argc, char **argv)
{
printf("Testing posix timers. False negative may happen on CPU execution \n");
@@ -217,5 +291,8 @@ int main(int argc, char **argv)
if (check_timer_create(CLOCK_PROCESS_CPUTIME_ID) < 0)
return ksft_exit_fail();
if (check_timer_distribution() < 0)
return ksft_exit_fail();
return ksft_exit_pass();
}