From 2cc624b0a7e68ba8957b18600181f7d5b0f3e1b6 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 9 Feb 2021 18:33:41 -0800 Subject: [PATCH] x86/fault: Split the OOPS code out from no_context() Not all callers of no_context() want to run exception fixups. Separate the OOPS code out from the fixup code in no_context(). Signed-off-by: Andy Lutomirski Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/450f8d8eabafb83a5df349108c8e5ea83a2f939d.1612924255.git.luto@kernel.org --- arch/x86/mm/fault.c | 146 +++++++++++++++++++++++--------------------- 1 file changed, 77 insertions(+), 69 deletions(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index cbb1a9754473..dbf6a940b03f 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -655,13 +655,84 @@ static void set_signal_archinfo(unsigned long address, } static noinline void -no_context(struct pt_regs *regs, unsigned long error_code, - unsigned long address, int signal, int si_code) +page_fault_oops(struct pt_regs *regs, unsigned long error_code, + unsigned long address) { - struct task_struct *tsk = current; unsigned long flags; int sig; + if (user_mode(regs)) { + /* + * Implicit kernel access from user mode? Skip the stack + * overflow and EFI special cases. + */ + goto oops; + } + +#ifdef CONFIG_VMAP_STACK + /* + * Stack overflow? During boot, we can fault near the initial + * stack in the direct map, but that's not an overflow -- check + * that we're in vmalloc space to avoid this. + */ + if (is_vmalloc_addr((void *)address) && + (((unsigned long)current->stack - 1 - address < PAGE_SIZE) || + address - ((unsigned long)current->stack + THREAD_SIZE) < PAGE_SIZE)) { + unsigned long stack = __this_cpu_ist_top_va(DF) - sizeof(void *); + /* + * We're likely to be running with very little stack space + * left. It's plausible that we'd hit this condition but + * double-fault even before we get this far, in which case + * we're fine: the double-fault handler will deal with it. + * + * We don't want to make it all the way into the oops code + * and then double-fault, though, because we're likely to + * break the console driver and lose most of the stack dump. + */ + asm volatile ("movq %[stack], %%rsp\n\t" + "call handle_stack_overflow\n\t" + "1: jmp 1b" + : ASM_CALL_CONSTRAINT + : "D" ("kernel stack overflow (page fault)"), + "S" (regs), "d" (address), + [stack] "rm" (stack)); + unreachable(); + } +#endif + + /* + * Buggy firmware could access regions which might page fault, try to + * recover from such faults. + */ + if (IS_ENABLED(CONFIG_EFI)) + efi_recover_from_page_fault(address); + +oops: + /* + * Oops. The kernel tried to access some bad page. We'll have to + * terminate things with extreme prejudice: + */ + flags = oops_begin(); + + show_fault_oops(regs, error_code, address); + + if (task_stack_end_corrupted(current)) + printk(KERN_EMERG "Thread overran stack, or stack corrupted\n"); + + sig = SIGKILL; + if (__die("Oops", regs, error_code)) + sig = 0; + + /* Executive summary in case the body of the oops scrolled away */ + printk(KERN_DEFAULT "CR2: %016lx\n", address); + + oops_end(flags, regs, sig); +} + +static noinline void +no_context(struct pt_regs *regs, unsigned long error_code, + unsigned long address, int signal, int si_code) +{ if (user_mode(regs)) { /* * This is an implicit supervisor-mode access from user @@ -702,78 +773,15 @@ no_context(struct pt_regs *regs, unsigned long error_code, return; } -#ifdef CONFIG_VMAP_STACK /* - * Stack overflow? During boot, we can fault near the initial - * stack in the direct map, but that's not an overflow -- check - * that we're in vmalloc space to avoid this. - */ - if (is_vmalloc_addr((void *)address) && - (((unsigned long)tsk->stack - 1 - address < PAGE_SIZE) || - address - ((unsigned long)tsk->stack + THREAD_SIZE) < PAGE_SIZE)) { - unsigned long stack = __this_cpu_ist_top_va(DF) - sizeof(void *); - /* - * We're likely to be running with very little stack space - * left. It's plausible that we'd hit this condition but - * double-fault even before we get this far, in which case - * we're fine: the double-fault handler will deal with it. - * - * We don't want to make it all the way into the oops code - * and then double-fault, though, because we're likely to - * break the console driver and lose most of the stack dump. - */ - asm volatile ("movq %[stack], %%rsp\n\t" - "call handle_stack_overflow\n\t" - "1: jmp 1b" - : ASM_CALL_CONSTRAINT - : "D" ("kernel stack overflow (page fault)"), - "S" (regs), "d" (address), - [stack] "rm" (stack)); - unreachable(); - } -#endif - - /* - * 32-bit: - * - * Valid to do another page fault here, because if this fault - * had been triggered by is_prefetch fixup_exception would have - * handled it. - * - * 64-bit: - * - * Hall of shame of CPU/BIOS bugs. + * AMD erratum #91 manifests as a spurious page fault on a PREFETCH + * instruction. */ if (is_prefetch(regs, error_code, address)) return; - /* - * Buggy firmware could access regions which might page fault, try to - * recover from such faults. - */ - if (IS_ENABLED(CONFIG_EFI)) - efi_recover_from_page_fault(address); - oops: - /* - * Oops. The kernel tried to access some bad page. We'll have to - * terminate things with extreme prejudice: - */ - flags = oops_begin(); - - show_fault_oops(regs, error_code, address); - - if (task_stack_end_corrupted(tsk)) - printk(KERN_EMERG "Thread overran stack, or stack corrupted\n"); - - sig = SIGKILL; - if (__die("Oops", regs, error_code)) - sig = 0; - - /* Executive summary in case the body of the oops scrolled away */ - printk(KERN_DEFAULT "CR2: %016lx\n", address); - - oops_end(flags, regs, sig); + page_fault_oops(regs, error_code, address); } /*