From c9ae1b10d9561012a222e05f0d3ab4e93d301f06 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 7 Feb 2023 08:59:02 +0100 Subject: [PATCH 01/48] x86/paravirt: Merge activate_mm() and dup_mmap() callbacks The two paravirt callbacks .mmu.activate_mm() and .mmu.dup_mmap() are sharing the same implementations in all cases: for Xen PV guests they are pinning the PGD of the new mm_struct, and for all other cases they are a NOP. In the end, both callbacks are meant to register an address space with the underlying hypervisor, so there needs to be only a single callback for that purpose. So merge them to a common callback .mmu.enter_mmap() (in contrast to the corresponding already existing .mmu.exit_mmap()). As the first parameter of the old callbacks isn't used, drop it from the replacement one. [ bp: Remove last occurrence of paravirt_activate_mm() in asm/mmu_context.h ] Signed-off-by: Juergen Gross Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Boris Ostrovsky Reviewed-by: Srivatsa S. Bhat (VMware) Link: https://lore.kernel.org/r/20230207075902.7539-1-jgross@suse.com --- arch/x86/include/asm/mmu_context.h | 11 ++--------- arch/x86/include/asm/paravirt.h | 14 +++----------- arch/x86/include/asm/paravirt_types.h | 7 ++----- arch/x86/kernel/paravirt.c | 3 +-- arch/x86/mm/init.c | 2 +- arch/x86/xen/mmu_pv.c | 12 ++---------- 6 files changed, 11 insertions(+), 38 deletions(-) diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index e01aa74a6de7..c3ad8a526378 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -16,13 +16,6 @@ extern atomic64_t last_mm_ctx_id; -#ifndef CONFIG_PARAVIRT_XXL -static inline void paravirt_activate_mm(struct mm_struct *prev, - struct mm_struct *next) -{ -} -#endif /* !CONFIG_PARAVIRT_XXL */ - #ifdef CONFIG_PERF_EVENTS DECLARE_STATIC_KEY_FALSE(rdpmc_never_available_key); DECLARE_STATIC_KEY_FALSE(rdpmc_always_available_key); @@ -135,7 +128,7 @@ extern void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, #define activate_mm(prev, next) \ do { \ - paravirt_activate_mm((prev), (next)); \ + paravirt_enter_mmap(next); \ switch_mm((prev), (next), NULL); \ } while (0); @@ -168,7 +161,7 @@ static inline void arch_dup_pkeys(struct mm_struct *oldmm, static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) { arch_dup_pkeys(oldmm, mm); - paravirt_arch_dup_mmap(oldmm, mm); + paravirt_enter_mmap(mm); return ldt_dup_context(oldmm, mm); } diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index cf40e813b3d7..b49778664d2b 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -334,16 +334,9 @@ static inline void tss_update_io_bitmap(void) } #endif -static inline void paravirt_activate_mm(struct mm_struct *prev, - struct mm_struct *next) +static inline void paravirt_enter_mmap(struct mm_struct *next) { - PVOP_VCALL2(mmu.activate_mm, prev, next); -} - -static inline void paravirt_arch_dup_mmap(struct mm_struct *oldmm, - struct mm_struct *mm) -{ - PVOP_VCALL2(mmu.dup_mmap, oldmm, mm); + PVOP_VCALL1(mmu.enter_mmap, next); } static inline int paravirt_pgd_alloc(struct mm_struct *mm) @@ -789,8 +782,7 @@ extern void default_banner(void); #ifndef __ASSEMBLY__ #ifndef CONFIG_PARAVIRT_XXL -static inline void paravirt_arch_dup_mmap(struct mm_struct *oldmm, - struct mm_struct *mm) +static inline void paravirt_enter_mmap(struct mm_struct *mm) { } #endif diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 8c1da419260f..71bf64b963df 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -164,11 +164,8 @@ struct pv_mmu_ops { unsigned long (*read_cr3)(void); void (*write_cr3)(unsigned long); - /* Hooks for intercepting the creation/use of an mm_struct. */ - void (*activate_mm)(struct mm_struct *prev, - struct mm_struct *next); - void (*dup_mmap)(struct mm_struct *oldmm, - struct mm_struct *mm); + /* Hook for intercepting the creation/use of an mm_struct. */ + void (*enter_mmap)(struct mm_struct *mm); /* Hooks for allocating and freeing a pagetable top-level */ int (*pgd_alloc)(struct mm_struct *mm); diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 42e182868873..0e68a31be7c1 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -363,8 +363,7 @@ struct paravirt_patch_template pv_ops = { .mmu.make_pte = PTE_IDENT, .mmu.make_pgd = PTE_IDENT, - .mmu.dup_mmap = paravirt_nop, - .mmu.activate_mm = paravirt_nop, + .mmu.enter_mmap = paravirt_nop, .mmu.lazy_mode = { .enter = paravirt_nop, diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index cb258f58fdc8..cbc53da4c1b4 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -806,7 +806,7 @@ void __init poking_init(void) BUG_ON(!poking_mm); /* Xen PV guests need the PGD to be pinned. */ - paravirt_arch_dup_mmap(NULL, poking_mm); + paravirt_enter_mmap(poking_mm); /* * Randomize the poking address, but make sure that the following page diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index ee29fb558f2e..b3b8d289b9ab 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -885,14 +885,7 @@ void xen_mm_unpin_all(void) spin_unlock(&pgd_lock); } -static void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next) -{ - spin_lock(&next->page_table_lock); - xen_pgd_pin(next); - spin_unlock(&next->page_table_lock); -} - -static void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) +static void xen_enter_mmap(struct mm_struct *mm) { spin_lock(&mm->page_table_lock); xen_pgd_pin(mm); @@ -2153,8 +2146,7 @@ static const typeof(pv_ops) xen_mmu_ops __initconst = { .make_p4d = PV_CALLEE_SAVE(xen_make_p4d), #endif - .activate_mm = xen_activate_mm, - .dup_mmap = xen_dup_mmap, + .enter_mmap = xen_enter_mmap, .exit_mmap = xen_exit_mmap, .lazy_mode = { From 7214b32b6f4c6c1385a52f2e3a7107f28349f505 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Fri, 17 Feb 2023 03:20:05 +0000 Subject: [PATCH 02/48] x86/MCE/AMD: Make kobj_type structure constant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since ee6d3dd4ed48 ("driver core: make kobj_type constant.") the driver core allows the usage of const struct kobj_type. Take advantage of this to constify the structure definition to prevent modification at runtime. Signed-off-by: Thomas Weißschuh Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230217-kobj_type-mce-amd-v1-1-40ef94816444@weissschuh.net --- arch/x86/kernel/cpu/mce/amd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index 23c5072fbbb7..4881893e3164 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -1029,7 +1029,7 @@ static const struct sysfs_ops threshold_ops = { static void threshold_block_release(struct kobject *kobj); -static struct kobj_type threshold_ktype = { +static const struct kobj_type threshold_ktype = { .sysfs_ops = &threshold_ops, .default_groups = default_groups, .release = threshold_block_release, From 4e347bdf44c1fd4296a7b9657a2c0e1bd900fa50 Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Mon, 6 Feb 2023 08:18:30 -0600 Subject: [PATCH 03/48] tools/x86/kcpuid: Fix avx512bw and avx512lvl fields in Fn00000007 Leaf Fn00000007 contains avx512bw at bit 26 and avx512vl at bit 28. This is incorrect per the SDM. Correct avx512bw to be bit 30 and avx512lvl to be bit 31. Fixes: c6b2f240bf8d ("tools/x86: Add a kcpuid tool to show raw CPU features") Signed-off-by: Terry Bowman Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Feng Tang Link: https://lore.kernel.org/r/20230206141832.4162264-2-terry.bowman@amd.com --- tools/arch/x86/kcpuid/cpuid.csv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/arch/x86/kcpuid/cpuid.csv b/tools/arch/x86/kcpuid/cpuid.csv index 4f1c4b0c29e9..9914bdf4fc9e 100644 --- a/tools/arch/x86/kcpuid/cpuid.csv +++ b/tools/arch/x86/kcpuid/cpuid.csv @@ -184,8 +184,8 @@ 7, 0, EBX, 27, avx512er, AVX512 Exponent Reciproca instr 7, 0, EBX, 28, avx512cd, AVX512 Conflict Detection instr 7, 0, EBX, 29, sha, Intel Secure Hash Algorithm Extensions instr - 7, 0, EBX, 26, avx512bw, AVX512 Byte & Word instr - 7, 0, EBX, 28, avx512vl, AVX512 Vector Length Extentions (VL) + 7, 0, EBX, 30, avx512bw, AVX512 Byte & Word instr + 7, 0, EBX, 31, avx512vl, AVX512 Vector Length Extentions (VL) 7, 0, ECX, 0, prefetchwt1, X 7, 0, ECX, 1, avx512vbmi, AVX512 Vector Byte Manipulation Instructions 7, 0, ECX, 2, umip, User-mode Instruction Prevention From ce22e4346ff5c2f2ce6b6a43b16d336c5c6df245 Mon Sep 17 00:00:00 2001 From: Terry Bowman Date: Mon, 6 Feb 2023 08:18:31 -0600 Subject: [PATCH 04/48] tools/x86/kcpuid: Update AMD leaf Fn80000001 Add missing features to sub-leafs EAX, ECX, and EDX of 'Extended Processor Signature and Feature Bits' leaf Fn80000001. Signed-off-by: Terry Bowman Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Feng Tang Link: https://lore.kernel.org/r/20230206141832.4162264-3-terry.bowman@amd.com --- tools/arch/x86/kcpuid/cpuid.csv | 61 ++++++++++++++++++++++++++++++--- 1 file changed, 56 insertions(+), 5 deletions(-) diff --git a/tools/arch/x86/kcpuid/cpuid.csv b/tools/arch/x86/kcpuid/cpuid.csv index 9914bdf4fc9e..e0c25b75327e 100644 --- a/tools/arch/x86/kcpuid/cpuid.csv +++ b/tools/arch/x86/kcpuid/cpuid.csv @@ -340,19 +340,70 @@ # According to SDM # 40000000H - 4FFFFFFFH is invalid range - # Leaf 80000001H # Extended Processor Signature and Feature Bits -0x80000001, 0, ECX, 0, lahf_lm, LAHF/SAHF available in 64-bit mode -0x80000001, 0, ECX, 5, lzcnt, LZCNT -0x80000001, 0, ECX, 8, prefetchw, PREFETCHW +0x80000001, 0, EAX, 27:20, extfamily, Extended family +0x80000001, 0, EAX, 19:16, extmodel, Extended model +0x80000001, 0, EAX, 11:8, basefamily, Description of Family +0x80000001, 0, EAX, 11:8, basemodel, Model numbers vary with product +0x80000001, 0, EAX, 3:0, stepping, Processor stepping (revision) for a specific model +0x80000001, 0, EBX, 31:28, pkgtype, Specifies the package type + +0x80000001, 0, ECX, 0, lahf_lm, LAHF/SAHF available in 64-bit mode +0x80000001, 0, ECX, 1, cmplegacy, Core multi-processing legacy mode +0x80000001, 0, ECX, 2, svm, Indicates support for: VMRUN, VMLOAD, VMSAVE, CLGI, VMMCALL, and INVLPGA +0x80000001, 0, ECX, 3, extapicspace, Extended APIC register space +0x80000001, 0, ECX, 4, altmovecr8, Indicates support for LOCK MOV CR0 means MOV CR8 +0x80000001, 0, ECX, 5, lzcnt, LZCNT +0x80000001, 0, ECX, 6, sse4a, EXTRQ, INSERTQ, MOVNTSS, and MOVNTSD instruction support +0x80000001, 0, ECX, 7, misalignsse, Misaligned SSE Mode +0x80000001, 0, ECX, 8, prefetchw, PREFETCHW +0x80000001, 0, ECX, 9, osvw, OS Visible Work-around support +0x80000001, 0, ECX, 10, ibs, Instruction Based Sampling +0x80000001, 0, ECX, 11, xop, Extended operation support +0x80000001, 0, ECX, 12, skinit, SKINIT and STGI support +0x80000001, 0, ECX, 13, wdt, Watchdog timer support +0x80000001, 0, ECX, 15, lwp, Lightweight profiling support +0x80000001, 0, ECX, 16, fma4, Four-operand FMA instruction support +0x80000001, 0, ECX, 17, tce, Translation cache extension +0x80000001, 0, ECX, 22, TopologyExtensions, Indicates support for Core::X86::Cpuid::CachePropEax0 and Core::X86::Cpuid::ExtApicId +0x80000001, 0, ECX, 23, perfctrextcore, Indicates support for Core::X86::Msr::PERF_CTL0 - 5 and Core::X86::Msr::PERF_CTR +0x80000001, 0, ECX, 24, perfctrextdf, Indicates support for Core::X86::Msr::DF_PERF_CTL and Core::X86::Msr::DF_PERF_CTR +0x80000001, 0, ECX, 26, databreakpointextension, Indicates data breakpoint support for Core::X86::Msr::DR0_ADDR_MASK, Core::X86::Msr::DR1_ADDR_MASK, Core::X86::Msr::DR2_ADDR_MASK and Core::X86::Msr::DR3_ADDR_MASK +0x80000001, 0, ECX, 27, perftsc, Performance time-stamp counter supported +0x80000001, 0, ECX, 28, perfctrextllc, Indicates support for L3 performance counter extensions +0x80000001, 0, ECX, 29, mwaitextended, MWAITX and MONITORX capability is supported +0x80000001, 0, ECX, 30, admskextn, Indicates support for address mask extension (to 32 bits and to all 4 DRs) for instruction breakpoints + +0x80000001, 0, EDX, 0, fpu, x87 floating point unit on-chip +0x80000001, 0, EDX, 1, vme, Virtual-mode enhancements +0x80000001, 0, EDX, 2, de, Debugging extensions, IO breakpoints, CR4.DE +0x80000001, 0, EDX, 3, pse, Page-size extensions (4 MB pages) +0x80000001, 0, EDX, 4, tsc, Time stamp counter, RDTSC/RDTSCP instructions, CR4.TSD +0x80000001, 0, EDX, 5, msr, Model-specific registers (MSRs), with RDMSR and WRMSR instructions +0x80000001, 0, EDX, 6, pae, Physical-address extensions (PAE) +0x80000001, 0, EDX, 7, mce, Machine Check Exception, CR4.MCE +0x80000001, 0, EDX, 8, cmpxchg8b, CMPXCHG8B instruction +0x80000001, 0, EDX, 9, apic, advanced programmable interrupt controller (APIC) exists and is enabled 0x80000001, 0, EDX, 11, sysret, SYSCALL/SYSRET supported +0x80000001, 0, EDX, 12, mtrr, Memory-type range registers +0x80000001, 0, EDX, 13, pge, Page global extension, CR4.PGE +0x80000001, 0, EDX, 14, mca, Machine check architecture, MCG_CAP +0x80000001, 0, EDX, 15, cmov, Conditional move instructions, CMOV, FCOMI, FCMOV +0x80000001, 0, EDX, 16, pat, Page attribute table +0x80000001, 0, EDX, 17, pse36, Page-size extensions 0x80000001, 0, EDX, 20, exec_dis, Execute Disable Bit available +0x80000001, 0, EDX, 22, mmxext, AMD extensions to MMX instructions +0x80000001, 0, EDX, 23, mmx, MMX instructions +0x80000001, 0, EDX, 24, fxsr, FXSAVE and FXRSTOR instructions +0x80000001, 0, EDX, 25, ffxsr, FXSAVE and FXRSTOR instruction optimizations 0x80000001, 0, EDX, 26, 1gb_page, 1GB page supported 0x80000001, 0, EDX, 27, rdtscp, RDTSCP and IA32_TSC_AUX are available -#0x80000001, 0, EDX, 29, 64b, 64b Architecture supported +0x80000001, 0, EDX, 29, lm, 64b Architecture supported +0x80000001, 0, EDX, 30, threednowext, AMD extensions to 3DNow! instructions +0x80000001, 0, EDX, 31, threednow, 3DNow! instructions # Leaf 80000002H/80000003H/80000004H # Processor Brand String From cd3ad6619517cda3a055d864a85cebbd434dba9a Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Mon, 6 Feb 2023 08:18:32 -0600 Subject: [PATCH 05/48] tools/x86/kcpuid: Dump the CPUID function in detailed view Sometimes it is useful to know which CPUID leaf contains the fields so add it to -d output so that it looks like this: CPUID_0x8000001e_ECX[0x0]: extended_apic_id : 0x8 - Extended APIC ID core_id : 0x4 - Identifies the logical core ID threads_per_core : 0x1 - The number of threads per core is threads_per_core + 1 node_id : 0x0 - Node ID nodes_per_processor : 0x0 - Nodes per processor { 0: 1 node, else reserved } CPUID_0x8000001f_ECX[0x0]: sme - Secure Memory Encryption ... Signed-off-by: Borislav Petkov (AMD) Signed-off-by: Terry Bowman Reviewed-by: Feng Tang Link: https://lore.kernel.org/r/20230206141832.4162264-4-terry.bowman@amd.com --- tools/arch/x86/kcpuid/kcpuid.c | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/tools/arch/x86/kcpuid/kcpuid.c b/tools/arch/x86/kcpuid/kcpuid.c index dae75511fef7..416f5b35dd8f 100644 --- a/tools/arch/x86/kcpuid/kcpuid.c +++ b/tools/arch/x86/kcpuid/kcpuid.c @@ -33,7 +33,7 @@ struct reg_desc { struct bits_desc descs[32]; }; -enum { +enum cpuid_reg { R_EAX = 0, R_EBX, R_ECX, @@ -41,6 +41,10 @@ enum { NR_REGS }; +static const char * const reg_names[] = { + "EAX", "EBX", "ECX", "EDX", +}; + struct subleaf { u32 index; u32 sub; @@ -428,12 +432,18 @@ static void parse_text(void) /* Decode every eax/ebx/ecx/edx */ -static void decode_bits(u32 value, struct reg_desc *rdesc) +static void decode_bits(u32 value, struct reg_desc *rdesc, enum cpuid_reg reg) { struct bits_desc *bdesc; int start, end, i; u32 mask; + if (!rdesc->nr) { + if (show_details) + printf("\t %s: 0x%08x\n", reg_names[reg], value); + return; + } + for (i = 0; i < rdesc->nr; i++) { bdesc = &rdesc->descs[i]; @@ -468,13 +478,21 @@ static void show_leaf(struct subleaf *leaf) if (!leaf) return; - if (show_raw) + if (show_raw) { leaf_print_raw(leaf); + } else { + if (show_details) + printf("CPUID_0x%x_ECX[0x%x]:\n", + leaf->index, leaf->sub); + } - decode_bits(leaf->eax, &leaf->info[R_EAX]); - decode_bits(leaf->ebx, &leaf->info[R_EBX]); - decode_bits(leaf->ecx, &leaf->info[R_ECX]); - decode_bits(leaf->edx, &leaf->info[R_EDX]); + decode_bits(leaf->eax, &leaf->info[R_EAX], R_EAX); + decode_bits(leaf->ebx, &leaf->info[R_EBX], R_EBX); + decode_bits(leaf->ecx, &leaf->info[R_ECX], R_ECX); + decode_bits(leaf->edx, &leaf->info[R_EDX], R_EDX); + + if (!show_raw && show_details) + printf("\n"); } static void show_func(struct cpuid_func *func) From 554eec0b4a29d0dfe59065e2cf1d2568c7b4d5f0 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Wed, 22 Feb 2023 20:10:54 +0100 Subject: [PATCH 06/48] x86/mce: Always inline old MCA stubs The stubs for the ancient MCA support (CONFIG_X86_ANCIENT_MCE) are normally optimized away on 64-bit builds. However, an allmodconfig one causes the compiler to add sanitizer calls gunk into them and they exist as constprop calls. Which objtool then complains about: vmlinux.o: warning: objtool: do_machine_check+0xad8: call to \ pentium_machine_check.constprop.0() leaves .noinstr.text section due to them missing noinstr. One could tag them "noinstr" but what should really happen is, they should be forcefully inlined so that all that gunk gets optimized away and the warning doesn't even have a chance to fire. Do so. No functional changes. Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230222191054.4701-1-bp@alien8.de --- arch/x86/kernel/cpu/mce/internal.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h index 91a415553c27..d2412ce2d312 100644 --- a/arch/x86/kernel/cpu/mce/internal.h +++ b/arch/x86/kernel/cpu/mce/internal.h @@ -244,11 +244,11 @@ noinstr void pentium_machine_check(struct pt_regs *regs); noinstr void winchip_machine_check(struct pt_regs *regs); static inline void enable_p5_mce(void) { mce_p5_enabled = 1; } #else -static inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {} -static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {} -static inline void enable_p5_mce(void) {} -static inline void pentium_machine_check(struct pt_regs *regs) {} -static inline void winchip_machine_check(struct pt_regs *regs) {} +static __always_inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {} +static __always_inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {} +static __always_inline void enable_p5_mce(void) {} +static __always_inline void pentium_machine_check(struct pt_regs *regs) {} +static __always_inline void winchip_machine_check(struct pt_regs *regs) {} #endif noinstr u64 mce_rdmsrl(u32 msr); From 5910f06503aae3cc4890e562683abc3e38857ff9 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 24 Feb 2023 13:35:22 -0800 Subject: [PATCH 07/48] MAINTAINERS: Add x86 hardware vulnerabilities section Add the bunch of losers who have to deal with this to MAINTAINERS so that they can get explicitly CCed on more hw nightmares. [ bp: Add commit message. ] Signed-off-by: Josh Poimboeuf Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230224213522.nofavod2jzhn22wp@treble --- MAINTAINERS | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 8d5bc223f305..d95c6ccbd116 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -22660,6 +22660,17 @@ S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/asm F: arch/x86/entry/ +X86 HARDWARE VULNERABILITIES +M: Thomas Gleixner +M: Borislav Petkov +M: Peter Zijlstra +M: Josh Poimboeuf +R: Pawan Gupta +S: Maintained +F: Documentation/admin-guide/hw-vuln/ +F: arch/x86/include/asm/nospec-branch.h +F: arch/x86/kernel/cpu/bugs.c + X86 MCE INFRASTRUCTURE M: Tony Luck M: Borislav Petkov From 71b1e3ba3fed5a34c5fac6d3a15c2634b04c1eb7 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Sat, 11 Feb 2023 09:17:28 +0800 Subject: [PATCH 08/48] EDAC/skx: Fix overflows on the DRAM row address mapping arrays The current DRAM row address mapping arrays skx_{open,close}_row[] only support ranks with sizes up to 16G. Decoding a rank address to a DRAM row address for a 32G rank by using either one of the above arrays by the skx_edac driver, will result in an overflow on the array. For a 32G rank, the most significant DRAM row address bit (the bit17) is mapped from the bit34 of the rank address. Add this new mapping item to both arrays to fix the overflow issue. Fixes: 4ec656bdf43a ("EDAC, skx_edac: Add EDAC driver for Skylake") Reported-by: Feng Xu Tested-by: Feng Xu Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Link: https://lore.kernel.org/all/20230211011728.71764-1-qiuxu.zhuo@intel.com --- drivers/edac/skx_base.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/edac/skx_base.c b/drivers/edac/skx_base.c index 9397abb42c49..0a862336a7ce 100644 --- a/drivers/edac/skx_base.c +++ b/drivers/edac/skx_base.c @@ -510,7 +510,7 @@ rir_found: } static u8 skx_close_row[] = { - 15, 16, 17, 18, 20, 21, 22, 28, 10, 11, 12, 13, 29, 30, 31, 32, 33 + 15, 16, 17, 18, 20, 21, 22, 28, 10, 11, 12, 13, 29, 30, 31, 32, 33, 34 }; static u8 skx_close_column[] = { @@ -518,7 +518,7 @@ static u8 skx_close_column[] = { }; static u8 skx_open_row[] = { - 14, 15, 16, 20, 28, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33 + 14, 15, 16, 20, 28, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34 }; static u8 skx_open_column[] = { From 8cc68c9c9e92dbaae51a711454c66eb668045508 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Sat, 25 Feb 2023 01:11:31 +0100 Subject: [PATCH 09/48] x86/CPU/AMD: Make sure EFER[AIBRSE] is set The AutoIBRS bit gets set only on the BSP as part of determining which mitigation to enable on AMD. Setting on the APs relies on the circumstance that the APs get booted through the trampoline and EFER - the MSR which contains that bit - gets replicated on every AP from the BSP. However, this can change in the future and considering the security implications of this bit not being set on every CPU, make sure it is set by verifying EFER later in the boot process and on every AP. Reported-by: Josh Poimboeuf Signed-off-by: Borislav Petkov (AMD) Acked-by: Dave Hansen Link: https://lore.kernel.org/r/20230224185257.o3mcmloei5zqu7wa@treble --- arch/x86/kernel/cpu/amd.c | 11 +++++++++++ arch/x86/kernel/cpu/bugs.c | 10 +--------- arch/x86/kernel/cpu/cpu.h | 8 ++++++++ 3 files changed, 20 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 380753b14cab..dd32dbc7c33e 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -996,6 +996,17 @@ static void init_amd(struct cpuinfo_x86 *c) msr_set_bit(MSR_K7_HWCR, MSR_K7_HWCR_IRPERF_EN_BIT); check_null_seg_clears_base(c); + + /* + * Make sure EFER[AIBRSE - Automatic IBRS Enable] is set. The APs are brought up + * using the trampoline code and as part of it, MSR_EFER gets prepared there in + * order to be replicated onto them. Regardless, set it here again, if not set, + * to protect against any future refactoring/code reorganization which might + * miss setting this important bit. + */ + if (spectre_v2_in_eibrs_mode(spectre_v2_enabled) && + cpu_has(c, X86_FEATURE_AUTOIBRS)) + WARN_ON_ONCE(msr_set_bit(MSR_EFER, _EFER_AUTOIBRS)); } #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index f9d060e71c3e..182af64387d0 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -784,8 +784,7 @@ static int __init nospectre_v1_cmdline(char *str) } early_param("nospectre_v1", nospectre_v1_cmdline); -static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = - SPECTRE_V2_NONE; +enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = SPECTRE_V2_NONE; #undef pr_fmt #define pr_fmt(fmt) "RETBleed: " fmt @@ -1133,13 +1132,6 @@ spectre_v2_parse_user_cmdline(void) return SPECTRE_V2_USER_CMD_AUTO; } -static inline bool spectre_v2_in_eibrs_mode(enum spectre_v2_mitigation mode) -{ - return mode == SPECTRE_V2_EIBRS || - mode == SPECTRE_V2_EIBRS_RETPOLINE || - mode == SPECTRE_V2_EIBRS_LFENCE; -} - static inline bool spectre_v2_in_ibrs_mode(enum spectre_v2_mitigation mode) { return spectre_v2_in_eibrs_mode(mode) || mode == SPECTRE_V2_IBRS; diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 57a5349e6954..f97b0fe13da8 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -83,4 +83,12 @@ unsigned int aperfmperf_get_khz(int cpu); extern void x86_spec_ctrl_setup_ap(void); extern void update_srbds_msr(void); +extern enum spectre_v2_mitigation spectre_v2_enabled; + +static inline bool spectre_v2_in_eibrs_mode(enum spectre_v2_mitigation mode) +{ + return mode == SPECTRE_V2_EIBRS || + mode == SPECTRE_V2_EIBRS_RETPOLINE || + mode == SPECTRE_V2_EIBRS_LFENCE; +} #endif /* ARCH_X86_CPU_H */ From d7ce15e1d4162ab5e56dead10d4ae69a6b5c8ee8 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Wed, 1 Mar 2023 17:19:46 -0800 Subject: [PATCH 10/48] x86/split_lock: Enumerate architectural split lock disable bit The December 2022 edition of the Intel Instruction Set Extensions manual defined that the split lock disable bit in the IA32_CORE_CAPABILITIES MSR is (and retrospectively always has been) architectural. Remove all the model specific checks except for Ice Lake variants which are still needed because these CPU models do not enumerate presence of the IA32_CORE_CAPABILITIES MSR. Originally-by: Dave Hansen Signed-off-by: Fenghua Yu Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Tony Luck Link: https://lore.kernel.org/lkml/20220701131958.687066-1-fenghua.yu@intel.com/t/#mada243bee0915532a6adef6a9e32d244d1a9aef4 --- arch/x86/kernel/cpu/intel.c | 59 ++++++++++++++----------------------- 1 file changed, 22 insertions(+), 37 deletions(-) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 291d4167fab8..1c648b09e053 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -1451,31 +1451,13 @@ void handle_bus_lock(struct pt_regs *regs) } /* - * Bits in the IA32_CORE_CAPABILITIES are not architectural, so they should - * only be trusted if it is confirmed that a CPU model implements a - * specific feature at a particular bit position. - * - * The possible driver data field values: - * - * - 0: CPU models that are known to have the per-core split-lock detection - * feature even though they do not enumerate IA32_CORE_CAPABILITIES. - * - * - 1: CPU models which may enumerate IA32_CORE_CAPABILITIES and if so use - * bit 5 to enumerate the per-core split-lock detection feature. + * CPU models that are known to have the per-core split-lock detection + * feature even though they do not enumerate IA32_CORE_CAPABILITIES. */ static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = { - X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, 0), - X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, 0), - X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, 0), - X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT, 1), - X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, 1), - X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L, 1), - X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, 1), - X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, 1), - X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, 1), - X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, 1), - X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, 1), - X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, 1), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, 0), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, 0), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, 0), {} }; @@ -1487,24 +1469,27 @@ static void __init split_lock_setup(struct cpuinfo_x86 *c) if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) return; + /* Check for CPUs that have support but do not enumerate it: */ m = x86_match_cpu(split_lock_cpu_ids); - if (!m) + if (m) + goto supported; + + if (!cpu_has(c, X86_FEATURE_CORE_CAPABILITIES)) return; - switch (m->driver_data) { - case 0: - break; - case 1: - if (!cpu_has(c, X86_FEATURE_CORE_CAPABILITIES)) - return; - rdmsrl(MSR_IA32_CORE_CAPS, ia32_core_caps); - if (!(ia32_core_caps & MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT)) - return; - break; - default: - return; - } + /* + * Not all bits in MSR_IA32_CORE_CAPS are architectural, but + * MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT is. All CPUs that set + * it have split lock detection. + */ + rdmsrl(MSR_IA32_CORE_CAPS, ia32_core_caps); + if (ia32_core_caps & MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT) + goto supported; + /* CPU is not in the model list and does not have the MSR bit: */ + return; + +supported: cpu_model_supports_sld = true; __split_lock_setup(); } From 11af36cb898123fd4e0034f1bc6550aedcc87800 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 17 Mar 2023 07:33:25 +0100 Subject: [PATCH 11/48] x86/paravirt: Convert simple paravirt functions to asm All functions referenced via __PV_IS_CALLEE_SAVE() need to be assembler functions, as those functions calls are hidden from the compiler. In case the kernel is compiled with "-fzero-call-used-regs" the compiler will clobber caller-saved registers at the end of C functions, which will result in unexpectedly zeroed registers at the call site of the related paravirt functions. Replace the C functions with DEFINE_PARAVIRT_ASM() constructs using the same instructions as the related paravirt calls in the PVOP_ALT_[V]CALLEE*() macros. And since they're not C functions visible to the compiler anymore, latter won't do the callee-clobbered zeroing invoked by -fzero-call-used-regs and thus won't corrupt registers. [ bp: Extend commit message. ] Signed-off-by: Juergen Gross Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230317063325.361-1-jgross@suse.com --- arch/x86/include/asm/paravirt_types.h | 8 +++++++- arch/x86/kernel/paravirt.c | 27 ++++++--------------------- 2 files changed, 13 insertions(+), 22 deletions(-) diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 71bf64b963df..4acbcddddc29 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -559,8 +559,14 @@ void paravirt_flush_lazy_mmu(void); void _paravirt_nop(void); void paravirt_BUG(void); -u64 _paravirt_ident_64(u64); unsigned long paravirt_ret0(void); +#ifdef CONFIG_PARAVIRT_XXL +u64 _paravirt_ident_64(u64); +unsigned long pv_native_save_fl(void); +void pv_native_irq_disable(void); +void pv_native_irq_enable(void); +unsigned long pv_native_read_cr2(void); +#endif #define paravirt_nop ((void *)_paravirt_nop) diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 0e68a31be7c1..ac10b46c5832 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -64,11 +64,11 @@ static unsigned paravirt_patch_call(void *insn_buff, const void *target, } #ifdef CONFIG_PARAVIRT_XXL -/* identity function, which can be inlined */ -u64 notrace _paravirt_ident_64(u64 x) -{ - return x; -} +DEFINE_PARAVIRT_ASM(_paravirt_ident_64, "mov %rdi, %rax", .text); +DEFINE_PARAVIRT_ASM(pv_native_save_fl, "pushf; pop %rax", .noinstr.text); +DEFINE_PARAVIRT_ASM(pv_native_irq_disable, "cli", .noinstr.text); +DEFINE_PARAVIRT_ASM(pv_native_irq_enable, "sti", .noinstr.text); +DEFINE_PARAVIRT_ASM(pv_native_read_cr2, "mov %cr2, %rax", .noinstr.text); #endif DEFINE_STATIC_KEY_TRUE(virt_spin_lock_key); @@ -197,11 +197,6 @@ void paravirt_end_context_switch(struct task_struct *next) arch_enter_lazy_mmu_mode(); } -static noinstr unsigned long pv_native_read_cr2(void) -{ - return native_read_cr2(); -} - static noinstr void pv_native_write_cr2(unsigned long val) { native_write_cr2(val); @@ -222,16 +217,6 @@ noinstr void pv_native_wbinvd(void) native_wbinvd(); } -static noinstr void pv_native_irq_enable(void) -{ - native_irq_enable(); -} - -static noinstr void pv_native_irq_disable(void) -{ - native_irq_disable(); -} - static noinstr void pv_native_safe_halt(void) { native_safe_halt(); @@ -298,7 +283,7 @@ struct paravirt_patch_template pv_ops = { .cpu.end_context_switch = paravirt_nop, /* Irq ops. */ - .irq.save_fl = __PV_IS_CALLEE_SAVE(native_save_fl), + .irq.save_fl = __PV_IS_CALLEE_SAVE(pv_native_save_fl), .irq.irq_disable = __PV_IS_CALLEE_SAVE(pv_native_irq_disable), .irq.irq_enable = __PV_IS_CALLEE_SAVE(pv_native_irq_enable), .irq.safe_halt = pv_native_safe_halt, From 4c1cdec319b9aadb65737c3eb1f5cb74bd6aa156 Mon Sep 17 00:00:00 2001 From: Muralidhara M K Date: Fri, 27 Jan 2023 15:16:01 +0000 Subject: [PATCH 12/48] x86/MCE/AMD: Use an u64 for bank_map Thee maximum number of MCA banks is 64 (MAX_NR_BANKS), see a0bc32b3cacf ("x86/mce: Increase maximum number of banks to 64"). However, the bank_map which contains a bitfield of which banks to initialize is of type unsigned int and that overflows when those bit numbers are >= 32, leading to UBSAN complaining correctly: UBSAN: shift-out-of-bounds in arch/x86/kernel/cpu/mce/amd.c:1365:38 shift exponent 32 is too large for 32-bit type 'int' Change the bank_map to a u64 and use the proper BIT_ULL() macro when modifying bits in there. [ bp: Rewrite commit message. ] Fixes: a0bc32b3cacf ("x86/mce: Increase maximum number of banks to 64") Signed-off-by: Muralidhara M K Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230127151601.1068324-1-muralimk@amd.com --- arch/x86/kernel/cpu/mce/amd.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index 4881893e3164..0b971f974096 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -235,10 +235,10 @@ static DEFINE_PER_CPU(struct threshold_bank **, threshold_banks); * A list of the banks enabled on each logical CPU. Controls which respective * descriptors to initialize later in mce_threshold_create_device(). */ -static DEFINE_PER_CPU(unsigned int, bank_map); +static DEFINE_PER_CPU(u64, bank_map); /* Map of banks that have more than MCA_MISC0 available. */ -static DEFINE_PER_CPU(u32, smca_misc_banks_map); +static DEFINE_PER_CPU(u64, smca_misc_banks_map); static void amd_threshold_interrupt(void); static void amd_deferred_error_interrupt(void); @@ -267,7 +267,7 @@ static void smca_set_misc_banks_map(unsigned int bank, unsigned int cpu) return; if (low & MASK_BLKPTR_LO) - per_cpu(smca_misc_banks_map, cpu) |= BIT(bank); + per_cpu(smca_misc_banks_map, cpu) |= BIT_ULL(bank); } @@ -530,7 +530,7 @@ static u32 smca_get_block_address(unsigned int bank, unsigned int block, if (!block) return MSR_AMD64_SMCA_MCx_MISC(bank); - if (!(per_cpu(smca_misc_banks_map, cpu) & BIT(bank))) + if (!(per_cpu(smca_misc_banks_map, cpu) & BIT_ULL(bank))) return 0; return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1); @@ -574,7 +574,7 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr, int new; if (!block) - per_cpu(bank_map, cpu) |= (1 << bank); + per_cpu(bank_map, cpu) |= BIT_ULL(bank); memset(&b, 0, sizeof(b)); b.cpu = cpu; @@ -878,7 +878,7 @@ static void amd_threshold_interrupt(void) return; for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) { - if (!(per_cpu(bank_map, cpu) & (1 << bank))) + if (!(per_cpu(bank_map, cpu) & BIT_ULL(bank))) continue; first_block = bp[bank]->blocks; @@ -1356,7 +1356,7 @@ int mce_threshold_create_device(unsigned int cpu) return -ENOMEM; for (bank = 0; bank < numbanks; ++bank) { - if (!(this_cpu_read(bank_map) & (1 << bank))) + if (!(this_cpu_read(bank_map) & BIT_ULL(bank))) continue; err = threshold_create_bank(bp, cpu, bank); if (err) { From efb339a83368ab25de1a18c0fdff85e01c13a1ea Mon Sep 17 00:00:00 2001 From: Peter Gonda Date: Tue, 7 Mar 2023 20:24:39 +0100 Subject: [PATCH 13/48] crypto: ccp - Name -1 return value as SEV_RET_NO_FW_CALL The PSP can return a "firmware error" code of -1 in circumstances where the PSP has not actually been called. To make this protocol unambiguous, name the value SEV_RET_NO_FW_CALL. [ bp: Massage a bit. ] Signed-off-by: Peter Gonda Signed-off-by: Dionna Glaze Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20221207010210.2563293-2-dionnaglaze@google.com --- Documentation/virt/coco/sev-guest.rst | 4 ++-- drivers/crypto/ccp/sev-dev.c | 8 +++++--- include/uapi/linux/psp-sev.h | 7 +++++++ 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/Documentation/virt/coco/sev-guest.rst b/Documentation/virt/coco/sev-guest.rst index bf593e88cfd9..aa3e4c6a1f90 100644 --- a/Documentation/virt/coco/sev-guest.rst +++ b/Documentation/virt/coco/sev-guest.rst @@ -40,8 +40,8 @@ along with a description: The guest ioctl should be issued on a file descriptor of the /dev/sev-guest device. The ioctl accepts struct snp_user_guest_request. The input and output structure is specified through the req_data and resp_data field respectively. If the ioctl fails -to execute due to a firmware error, then fw_err code will be set otherwise the -fw_err will be set to 0x00000000000000ff. +to execute due to a firmware error, then fw_err code will be set. Otherwise, fw_err +will be set to 0x00000000ffffffff, i.e., the lower 32-bits are -1. The firmware checks that the message sequence counter is one greater than the guests message sequence counter. If guest driver fails to increment message diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index e2f25926eb51..823c67a43c38 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c @@ -444,10 +444,10 @@ static int __sev_init_ex_locked(int *error) static int __sev_platform_init_locked(int *error) { + int rc = 0, psp_ret = SEV_RET_NO_FW_CALL; struct psp_device *psp = psp_master; - struct sev_device *sev; - int rc = 0, psp_ret = -1; int (*init_function)(int *error); + struct sev_device *sev; if (!psp || !psp->sev_data) return -ENODEV; @@ -475,9 +475,11 @@ static int __sev_platform_init_locked(int *error) * initialization function should succeed by replacing the state * with a reset state. */ - dev_err(sev->dev, "SEV: retrying INIT command because of SECURE_DATA_INVALID error. Retrying once to reset PSP SEV state."); + dev_err(sev->dev, +"SEV: retrying INIT command because of SECURE_DATA_INVALID error. Retrying once to reset PSP SEV state."); rc = init_function(&psp_ret); } + if (error) *error = psp_ret; diff --git a/include/uapi/linux/psp-sev.h b/include/uapi/linux/psp-sev.h index 91b4c63d5cbf..1c9da485318f 100644 --- a/include/uapi/linux/psp-sev.h +++ b/include/uapi/linux/psp-sev.h @@ -36,6 +36,13 @@ enum { * SEV Firmware status code */ typedef enum { + /* + * This error code is not in the SEV spec. Its purpose is to convey that + * there was an error that prevented the SEV firmware from being called. + * The SEV API error codes are 16 bits, so the -1 value will not overlap + * with possible values from the specification. + */ + SEV_RET_NO_FW_CALL = -1, SEV_RET_SUCCESS = 0, SEV_RET_INVALID_PLATFORM_STATE, SEV_RET_INVALID_GUEST_STATE, From dbf07b544ca12c0ab8bd3fc1ea3509ea713a8bf5 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Tue, 7 Mar 2023 20:24:46 +0100 Subject: [PATCH 14/48] crypto: ccp: Get rid of __sev_platform_init_locked()'s local function pointer Add a wrapper instead. No functional changes. Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230307192449.24732-9-bp@alien8.de --- drivers/crypto/ccp/sev-dev.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index 823c67a43c38..e346c00b132a 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c @@ -442,11 +442,18 @@ static int __sev_init_ex_locked(int *error) return __sev_do_cmd_locked(SEV_CMD_INIT_EX, &data, error); } +static inline int __sev_do_init_locked(int *psp_ret) +{ + if (sev_init_ex_buffer) + return __sev_init_ex_locked(psp_ret); + else + return __sev_init_locked(psp_ret); +} + static int __sev_platform_init_locked(int *error) { int rc = 0, psp_ret = SEV_RET_NO_FW_CALL; struct psp_device *psp = psp_master; - int (*init_function)(int *error); struct sev_device *sev; if (!psp || !psp->sev_data) @@ -458,15 +465,12 @@ static int __sev_platform_init_locked(int *error) return 0; if (sev_init_ex_buffer) { - init_function = __sev_init_ex_locked; rc = sev_read_init_ex_file(); if (rc) return rc; - } else { - init_function = __sev_init_locked; } - rc = init_function(&psp_ret); + rc = __sev_do_init_locked(&psp_ret); if (rc && psp_ret == SEV_RET_SECURE_DATA_INVALID) { /* * Initialization command returned an integrity check failure @@ -477,7 +481,7 @@ static int __sev_platform_init_locked(int *error) */ dev_err(sev->dev, "SEV: retrying INIT command because of SECURE_DATA_INVALID error. Retrying once to reset PSP SEV state."); - rc = init_function(&psp_ret); + rc = __sev_do_init_locked(&psp_ret); } if (error) From 965006103a14703cc42043bbf9b5e0cdf7a468ad Mon Sep 17 00:00:00 2001 From: Dionna Glaze Date: Tue, 7 Mar 2023 20:24:48 +0100 Subject: [PATCH 15/48] virt/coco/sev-guest: Double-buffer messages The encryption algorithms read and write directly to shared unencrypted memory, which may leak information as well as permit the host to tamper with the message integrity. Instead, copy whole messages in or out as needed before doing any computation on them. Fixes: d5af44dde546 ("x86/sev: Provide support for SNP guest request NAEs") Signed-off-by: Dionna Glaze Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230214164638.1189804-3-dionnaglaze@google.com --- drivers/virt/coco/sev-guest/sev-guest.c | 27 +++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/drivers/virt/coco/sev-guest/sev-guest.c b/drivers/virt/coco/sev-guest/sev-guest.c index 46f1a8d558b0..0c7b47acba2a 100644 --- a/drivers/virt/coco/sev-guest/sev-guest.c +++ b/drivers/virt/coco/sev-guest/sev-guest.c @@ -46,7 +46,15 @@ struct snp_guest_dev { void *certs_data; struct snp_guest_crypto *crypto; + /* request and response are in unencrypted memory */ struct snp_guest_msg *request, *response; + + /* + * Avoid information leakage by double-buffering shared messages + * in fields that are in regular encrypted memory. + */ + struct snp_guest_msg secret_request, secret_response; + struct snp_secrets_page_layout *layout; struct snp_req_data input; u32 *os_area_msg_seqno; @@ -266,14 +274,17 @@ static int dec_payload(struct snp_guest_dev *snp_dev, struct snp_guest_msg *msg, static int verify_and_dec_payload(struct snp_guest_dev *snp_dev, void *payload, u32 sz) { struct snp_guest_crypto *crypto = snp_dev->crypto; - struct snp_guest_msg *resp = snp_dev->response; - struct snp_guest_msg *req = snp_dev->request; + struct snp_guest_msg *resp = &snp_dev->secret_response; + struct snp_guest_msg *req = &snp_dev->secret_request; struct snp_guest_msg_hdr *req_hdr = &req->hdr; struct snp_guest_msg_hdr *resp_hdr = &resp->hdr; dev_dbg(snp_dev->dev, "response [seqno %lld type %d version %d sz %d]\n", resp_hdr->msg_seqno, resp_hdr->msg_type, resp_hdr->msg_version, resp_hdr->msg_sz); + /* Copy response from shared memory to encrypted memory. */ + memcpy(resp, snp_dev->response, sizeof(*resp)); + /* Verify that the sequence counter is incremented by 1 */ if (unlikely(resp_hdr->msg_seqno != (req_hdr->msg_seqno + 1))) return -EBADMSG; @@ -297,7 +308,7 @@ static int verify_and_dec_payload(struct snp_guest_dev *snp_dev, void *payload, static int enc_payload(struct snp_guest_dev *snp_dev, u64 seqno, int version, u8 type, void *payload, size_t sz) { - struct snp_guest_msg *req = snp_dev->request; + struct snp_guest_msg *req = &snp_dev->secret_request; struct snp_guest_msg_hdr *hdr = &req->hdr; memset(req, 0, sizeof(*req)); @@ -417,13 +428,21 @@ static int handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, in if (!seqno) return -EIO; + /* Clear shared memory's response for the host to populate. */ memset(snp_dev->response, 0, sizeof(struct snp_guest_msg)); - /* Encrypt the userspace provided payload */ + /* Encrypt the userspace provided payload in snp_dev->secret_request. */ rc = enc_payload(snp_dev, seqno, msg_ver, type, req_buf, req_sz); if (rc) return rc; + /* + * Write the fully encrypted request to the shared unencrypted + * request page. + */ + memcpy(snp_dev->request, &snp_dev->secret_request, + sizeof(snp_dev->secret_request)); + rc = __handle_guest_request(snp_dev, exit_code, fw_err); if (rc) { if (rc == -EIO && *fw_err == SNP_GUEST_REQ_INVALID_LEN) From 0144e3b85d7b42e8a4cda991c0e81f131897457a Mon Sep 17 00:00:00 2001 From: Dionna Glaze Date: Tue, 7 Mar 2023 20:24:49 +0100 Subject: [PATCH 16/48] x86/sev: Change snp_guest_issue_request()'s fw_err argument The GHCB specification declares that the firmware error value for a guest request will be stored in the lower 32 bits of EXIT_INFO_2. The upper 32 bits are for the VMM's own error code. The fw_err argument to snp_guest_issue_request() is thus a misnomer, and callers will need access to all 64 bits. The type of unsigned long also causes problems, since sw_exit_info2 is u64 (unsigned long long) vs the argument's unsigned long*. Change this type for issuing the guest request. Pass the ioctl command struct's error field directly instead of in a local variable, since an incomplete guest request may not set the error code, and uninitialized stack memory would be written back to user space. The firmware might not even be called, so bookend the call with the no firmware call error and clear the error. Since the "fw_err" field is really exitinfo2 split into the upper bits' vmm error code and lower bits' firmware error code, convert the 64 bit value to a union. [ bp: - Massage commit message - adjust code - Fix a build issue as Reported-by: kernel test robot Link: https://lore.kernel.org/oe-kbuild-all/202303070609.vX6wp2Af-lkp@intel.com - print exitinfo2 in hex Tom: - Correct -EIO exit case. ] Signed-off-by: Dionna Glaze Signed-off-by: Tom Lendacky Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230214164638.1189804-5-dionnaglaze@google.com Link: https://lore.kernel.org/r/20230307192449.24732-12-bp@alien8.de --- Documentation/virt/coco/sev-guest.rst | 20 ++++--- arch/x86/include/asm/sev-common.h | 4 -- arch/x86/include/asm/sev.h | 10 ++-- arch/x86/kernel/sev.c | 15 +++--- drivers/virt/coco/sev-guest/sev-guest.c | 72 +++++++++++++------------ include/uapi/linux/sev-guest.h | 18 ++++++- 6 files changed, 83 insertions(+), 56 deletions(-) diff --git a/Documentation/virt/coco/sev-guest.rst b/Documentation/virt/coco/sev-guest.rst index aa3e4c6a1f90..68b0d2363af8 100644 --- a/Documentation/virt/coco/sev-guest.rst +++ b/Documentation/virt/coco/sev-guest.rst @@ -37,11 +37,11 @@ along with a description: the return value. General error numbers (-ENOMEM, -EINVAL) are not detailed, but errors with specific meanings are. -The guest ioctl should be issued on a file descriptor of the /dev/sev-guest device. -The ioctl accepts struct snp_user_guest_request. The input and output structure is -specified through the req_data and resp_data field respectively. If the ioctl fails -to execute due to a firmware error, then fw_err code will be set. Otherwise, fw_err -will be set to 0x00000000ffffffff, i.e., the lower 32-bits are -1. +The guest ioctl should be issued on a file descriptor of the /dev/sev-guest +device. The ioctl accepts struct snp_user_guest_request. The input and +output structure is specified through the req_data and resp_data field +respectively. If the ioctl fails to execute due to a firmware error, then +the fw_error code will be set, otherwise fw_error will be set to -1. The firmware checks that the message sequence counter is one greater than the guests message sequence counter. If guest driver fails to increment message @@ -57,8 +57,14 @@ counter (e.g. counter overflow), then -EIO will be returned. __u64 req_data; __u64 resp_data; - /* firmware error code on failure (see psp-sev.h) */ - __u64 fw_err; + /* bits[63:32]: VMM error code, bits[31:0] firmware error code (see psp-sev.h) */ + union { + __u64 exitinfo2; + struct { + __u32 fw_error; + __u32 vmm_error; + }; + }; }; 2.1 SNP_GET_REPORT diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h index b63be696b776..0759af9b1acf 100644 --- a/arch/x86/include/asm/sev-common.h +++ b/arch/x86/include/asm/sev-common.h @@ -128,10 +128,6 @@ struct snp_psc_desc { struct psc_entry entries[VMGEXIT_PSC_MAX_ENTRY]; } __packed; -/* Guest message request error codes */ -#define SNP_GUEST_REQ_INVALID_LEN BIT_ULL(32) -#define SNP_GUEST_REQ_ERR_BUSY BIT_ULL(33) - #define GHCB_MSR_TERM_REQ 0x100 #define GHCB_MSR_TERM_REASON_SET_POS 12 #define GHCB_MSR_TERM_REASON_SET_MASK 0xf diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index ebc271bb6d8e..13dc2a9d23c1 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -9,6 +9,8 @@ #define __ASM_ENCRYPTED_STATE_H #include +#include + #include #include #include @@ -185,6 +187,9 @@ static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate) return rc; } + +struct snp_guest_request_ioctl; + void setup_ghcb(void); void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned int npages); @@ -196,7 +201,7 @@ void snp_set_memory_private(unsigned long vaddr, unsigned int npages); void snp_set_wakeup_secondary_cpu(void); bool snp_init(struct boot_params *bp); void __init __noreturn snp_abort(void); -int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, unsigned long *fw_err); +int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio); #else static inline void sev_es_ist_enter(struct pt_regs *regs) { } static inline void sev_es_ist_exit(void) { } @@ -216,8 +221,7 @@ static inline void snp_set_memory_private(unsigned long vaddr, unsigned int npag static inline void snp_set_wakeup_secondary_cpu(void) { } static inline bool snp_init(struct boot_params *bp) { return false; } static inline void snp_abort(void) { } -static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, - unsigned long *fw_err) +static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio) { return -ENOTTY; } diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 3f664ab277c4..b031244d6d2d 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -22,6 +22,8 @@ #include #include #include +#include +#include #include #include @@ -2175,7 +2177,7 @@ static int __init init_sev_config(char *str) } __setup("sev=", init_sev_config); -int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, unsigned long *fw_err) +int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio) { struct ghcb_state state; struct es_em_ctxt ctxt; @@ -2183,8 +2185,7 @@ int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, unsigned struct ghcb *ghcb; int ret; - if (!fw_err) - return -EINVAL; + rio->exitinfo2 = SEV_RET_NO_FW_CALL; /* * __sev_get_ghcb() needs to run with IRQs disabled because it is using @@ -2209,16 +2210,16 @@ int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, unsigned if (ret) goto e_put; - *fw_err = ghcb->save.sw_exit_info_2; - switch (*fw_err) { + rio->exitinfo2 = ghcb->save.sw_exit_info_2; + switch (rio->exitinfo2) { case 0: break; - case SNP_GUEST_REQ_ERR_BUSY: + case SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_BUSY): ret = -EAGAIN; break; - case SNP_GUEST_REQ_INVALID_LEN: + case SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN): /* Number of expected pages are returned in RBX */ if (exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST) { input->data_npages = ghcb_get_rbx(ghcb); diff --git a/drivers/virt/coco/sev-guest/sev-guest.c b/drivers/virt/coco/sev-guest/sev-guest.c index 0c7b47acba2a..97dbe715e96a 100644 --- a/drivers/virt/coco/sev-guest/sev-guest.c +++ b/drivers/virt/coco/sev-guest/sev-guest.c @@ -332,11 +332,12 @@ static int enc_payload(struct snp_guest_dev *snp_dev, u64 seqno, int version, u8 return __enc_payload(snp_dev, req, payload, sz); } -static int __handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, __u64 *fw_err) +static int __handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, + struct snp_guest_request_ioctl *rio) { - unsigned long err = 0xff, override_err = 0; unsigned long req_start = jiffies; unsigned int override_npages = 0; + u64 override_err = 0; int rc; retry_request: @@ -346,7 +347,7 @@ retry_request: * sequence number must be incremented or the VMPCK must be deleted to * prevent reuse of the IV. */ - rc = snp_issue_guest_request(exit_code, &snp_dev->input, &err); + rc = snp_issue_guest_request(exit_code, &snp_dev->input, rio); switch (rc) { case -ENOSPC: /* @@ -364,7 +365,7 @@ retry_request: * request buffer size was too small and give the caller the * required buffer size. */ - override_err = SNP_GUEST_REQ_INVALID_LEN; + override_err = SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN); /* * If this call to the firmware succeeds, the sequence number can @@ -377,7 +378,7 @@ retry_request: goto retry_request; /* - * The host may return SNP_GUEST_REQ_ERR_EBUSY if the request has been + * The host may return SNP_GUEST_VMM_ERR_BUSY if the request has been * throttled. Retry in the driver to avoid returning and reusing the * message sequence number on a different message. */ @@ -398,27 +399,29 @@ retry_request: */ snp_inc_msg_seqno(snp_dev); - if (fw_err) - *fw_err = override_err ?: err; + if (override_err) { + rio->exitinfo2 = override_err; + + /* + * If an extended guest request was issued and the supplied certificate + * buffer was not large enough, a standard guest request was issued to + * prevent IV reuse. If the standard request was successful, return -EIO + * back to the caller as would have originally been returned. + */ + if (!rc && override_err == SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN)) + rc = -EIO; + } if (override_npages) snp_dev->input.data_npages = override_npages; - /* - * If an extended guest request was issued and the supplied certificate - * buffer was not large enough, a standard guest request was issued to - * prevent IV reuse. If the standard request was successful, return -EIO - * back to the caller as would have originally been returned. - */ - if (!rc && override_err == SNP_GUEST_REQ_INVALID_LEN) - return -EIO; - return rc; } -static int handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, int msg_ver, - u8 type, void *req_buf, size_t req_sz, void *resp_buf, - u32 resp_sz, __u64 *fw_err) +static int handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, + struct snp_guest_request_ioctl *rio, u8 type, + void *req_buf, size_t req_sz, void *resp_buf, + u32 resp_sz) { u64 seqno; int rc; @@ -432,7 +435,7 @@ static int handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, in memset(snp_dev->response, 0, sizeof(struct snp_guest_msg)); /* Encrypt the userspace provided payload in snp_dev->secret_request. */ - rc = enc_payload(snp_dev, seqno, msg_ver, type, req_buf, req_sz); + rc = enc_payload(snp_dev, seqno, rio->msg_version, type, req_buf, req_sz); if (rc) return rc; @@ -443,12 +446,16 @@ static int handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, in memcpy(snp_dev->request, &snp_dev->secret_request, sizeof(snp_dev->secret_request)); - rc = __handle_guest_request(snp_dev, exit_code, fw_err); + rc = __handle_guest_request(snp_dev, exit_code, rio); if (rc) { - if (rc == -EIO && *fw_err == SNP_GUEST_REQ_INVALID_LEN) + if (rc == -EIO && + rio->exitinfo2 == SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN)) return rc; - dev_alert(snp_dev->dev, "Detected error from ASP request. rc: %d, fw_err: %llu\n", rc, *fw_err); + dev_alert(snp_dev->dev, + "Detected error from ASP request. rc: %d, exitinfo2: 0x%llx\n", + rc, rio->exitinfo2); + snp_disable_vmpck(snp_dev); return rc; } @@ -488,9 +495,9 @@ static int get_report(struct snp_guest_dev *snp_dev, struct snp_guest_request_io if (!resp) return -ENOMEM; - rc = handle_guest_request(snp_dev, SVM_VMGEXIT_GUEST_REQUEST, arg->msg_version, + rc = handle_guest_request(snp_dev, SVM_VMGEXIT_GUEST_REQUEST, arg, SNP_MSG_REPORT_REQ, &req, sizeof(req), resp->data, - resp_len, &arg->fw_err); + resp_len); if (rc) goto e_free; @@ -528,9 +535,8 @@ static int get_derived_key(struct snp_guest_dev *snp_dev, struct snp_guest_reque if (copy_from_user(&req, (void __user *)arg->req_data, sizeof(req))) return -EFAULT; - rc = handle_guest_request(snp_dev, SVM_VMGEXIT_GUEST_REQUEST, arg->msg_version, - SNP_MSG_KEY_REQ, &req, sizeof(req), buf, resp_len, - &arg->fw_err); + rc = handle_guest_request(snp_dev, SVM_VMGEXIT_GUEST_REQUEST, arg, + SNP_MSG_KEY_REQ, &req, sizeof(req), buf, resp_len); if (rc) return rc; @@ -590,12 +596,12 @@ cmd: return -ENOMEM; snp_dev->input.data_npages = npages; - ret = handle_guest_request(snp_dev, SVM_VMGEXIT_EXT_GUEST_REQUEST, arg->msg_version, + ret = handle_guest_request(snp_dev, SVM_VMGEXIT_EXT_GUEST_REQUEST, arg, SNP_MSG_REPORT_REQ, &req.data, - sizeof(req.data), resp->data, resp_len, &arg->fw_err); + sizeof(req.data), resp->data, resp_len); /* If certs length is invalid then copy the returned length */ - if (arg->fw_err == SNP_GUEST_REQ_INVALID_LEN) { + if (arg->vmm_error == SNP_GUEST_VMM_ERR_INVALID_LEN) { req.certs_len = snp_dev->input.data_npages << PAGE_SHIFT; if (copy_to_user((void __user *)arg->req_data, &req, sizeof(req))) @@ -630,7 +636,7 @@ static long snp_guest_ioctl(struct file *file, unsigned int ioctl, unsigned long if (copy_from_user(&input, argp, sizeof(input))) return -EFAULT; - input.fw_err = 0xff; + input.exitinfo2 = 0xff; /* Message version must be non-zero */ if (!input.msg_version) @@ -661,7 +667,7 @@ static long snp_guest_ioctl(struct file *file, unsigned int ioctl, unsigned long mutex_unlock(&snp_cmd_mutex); - if (input.fw_err && copy_to_user(argp, &input, sizeof(input))) + if (input.exitinfo2 && copy_to_user(argp, &input, sizeof(input))) return -EFAULT; return ret; diff --git a/include/uapi/linux/sev-guest.h b/include/uapi/linux/sev-guest.h index 256aaeff7e65..2aa39112cf8d 100644 --- a/include/uapi/linux/sev-guest.h +++ b/include/uapi/linux/sev-guest.h @@ -52,8 +52,14 @@ struct snp_guest_request_ioctl { __u64 req_data; __u64 resp_data; - /* firmware error code on failure (see psp-sev.h) */ - __u64 fw_err; + /* bits[63:32]: VMM error code, bits[31:0] firmware error code (see psp-sev.h) */ + union { + __u64 exitinfo2; + struct { + __u32 fw_error; + __u32 vmm_error; + }; + }; }; struct snp_ext_report_req { @@ -77,4 +83,12 @@ struct snp_ext_report_req { /* Get SNP extended report as defined in the GHCB specification version 2. */ #define SNP_GET_EXT_REPORT _IOWR(SNP_GUEST_REQ_IOC_TYPE, 0x2, struct snp_guest_request_ioctl) +/* Guest message request EXIT_INFO_2 constants */ +#define SNP_GUEST_FW_ERR_MASK GENMASK_ULL(31, 0) +#define SNP_GUEST_VMM_ERR_SHIFT 32 +#define SNP_GUEST_VMM_ERR(x) (((u64)x) << SNP_GUEST_VMM_ERR_SHIFT) + +#define SNP_GUEST_VMM_ERR_INVALID_LEN 1 +#define SNP_GUEST_VMM_ERR_BUSY 2 + #endif /* __UAPI_LINUX_SEV_GUEST_H_ */ From 4e89780a4c4b303c84c53f7ed914cdf2ebc0aa30 Mon Sep 17 00:00:00 2001 From: Deepak R Varma Date: Thu, 2 Mar 2023 00:43:31 +0530 Subject: [PATCH 17/48] EDAC/altera: Remove redundant error logging A call to platform_get_irq() already prints an error on failure within its own implementation. So printing another error based on its return value in the caller is redundant and should be removed. The clean up also makes if condition block braces unnecessary. Remove that as well. Issue identified using platform_get_irq.cocci coccinelle semantic patch. Signed-off-by: Deepak R Varma Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/Y/+j27kqdhflPtaj@ubun2204.myguest.virtualbox.org --- drivers/edac/altera_edac.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index e7e8e624a436..72fa49c44360 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -2149,10 +2149,8 @@ static int altr_edac_a10_probe(struct platform_device *pdev) } edac->sb_irq = platform_get_irq(pdev, 0); - if (edac->sb_irq < 0) { - dev_err(&pdev->dev, "No SBERR IRQ resource\n"); + if (edac->sb_irq < 0) return edac->sb_irq; - } irq_set_chained_handler_and_data(edac->sb_irq, altr_edac_a10_irq_handler, @@ -2184,10 +2182,9 @@ static int altr_edac_a10_probe(struct platform_device *pdev) } #else edac->db_irq = platform_get_irq(pdev, 1); - if (edac->db_irq < 0) { - dev_err(&pdev->dev, "No DBERR IRQ resource\n"); + if (edac->db_irq < 0) return edac->db_irq; - } + irq_set_chained_handler_and_data(edac->db_irq, altr_edac_a10_irq_handler, edac); #endif From 22767544e9763e82acb60c233051bc426381b61c Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 20 Mar 2023 22:20:12 +0100 Subject: [PATCH 18/48] x86/ACPI/boot: Improve __acpi_acquire_global_lock Improve __acpi_acquire_global_lock by using a temporary variable. This enables compiler to perform if-conversion and improves generated code from: ... 72a: d1 ea shr %edx 72c: 83 e1 fc and $0xfffffffc,%ecx 72f: 83 e2 01 and $0x1,%edx 732: 09 ca or %ecx,%edx 734: 83 c2 02 add $0x2,%edx 737: f0 0f b1 17 lock cmpxchg %edx,(%rdi) 73b: 75 e9 jne 726 <__acpi_acquire_global_lock+0x6> 73d: 83 e2 03 and $0x3,%edx 740: 31 c0 xor %eax,%eax 742: 83 fa 03 cmp $0x3,%edx 745: 0f 95 c0 setne %al 748: f7 d8 neg %eax to: ... 72a: d1 e9 shr %ecx 72c: 83 e2 fc and $0xfffffffc,%edx 72f: 83 e1 01 and $0x1,%ecx 732: 09 ca or %ecx,%edx 734: 83 c2 02 add $0x2,%edx 737: f0 0f b1 17 lock cmpxchg %edx,(%rdi) 73b: 75 e9 jne 726 <__acpi_acquire_global_lock+0x6> 73d: 8d 41 ff lea -0x1(%rcx),%eax BTW: the compiler could generate: lea 0x2(%rcx,%rdx,1),%edx instead of: or %ecx,%edx add $0x2,%edx but unwated conversion from add to or when bits are known to be zero prevents this improvement. This is GCC PR108477. No functional change intended. Signed-off-by: Uros Bizjak Signed-off-by: Dave Hansen Acked-by: Rafael J. Wysocki Link: https://lore.kernel.org/all/20230320212012.12704-1-ubizjak%40gmail.com --- arch/x86/kernel/acpi/boot.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 1c38174b5f01..a08a4a7a03f8 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1853,13 +1853,18 @@ early_param("acpi_sci", setup_acpi_sci); int __acpi_acquire_global_lock(unsigned int *lock) { - unsigned int old, new; + unsigned int old, new, val; old = READ_ONCE(*lock); do { - new = (((old & ~0x3) + 2) + ((old >> 1) & 0x1)); + val = (old >> 1) & 0x1; + new = (old & ~0x3) + 2 + val; } while (!try_cmpxchg(lock, &old, new)); - return ((new & 0x3) < 3) ? -1 : 0; + + if (val) + return 0; + + return -1; } int __acpi_release_global_lock(unsigned int *lock) From 5b6cb45072c03f0e0f0eff2dce2fe683cbbb9bf6 Mon Sep 17 00:00:00 2001 From: Jongwoo Han Date: Thu, 2 Mar 2023 11:11:20 +0900 Subject: [PATCH 19/48] EDAC/i5100: Fix typo in comment Correct typo from 'preform' to 'perform' in comment. Signed-off-by: Jongwoo Han Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230302021120.56794-1-jongwooo.han@gmail.com --- drivers/edac/i5100_edac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c index f5d82518c15e..8db680b6ae9b 100644 --- a/drivers/edac/i5100_edac.c +++ b/drivers/edac/i5100_edac.c @@ -909,7 +909,7 @@ static void i5100_do_inject(struct mem_ctl_info *mci) * * The injection code don't work without setting this register. * The register needs to be flipped off then on else the hardware - * will only preform the first injection. + * will only perform the first injection. * * Stop condition bits 7:4 * 1010 - Stop after one injection From 00e4feb8c0476bbde3c8bf4a593e0f82ca9a4df6 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Fri, 27 Jan 2023 17:04:03 +0000 Subject: [PATCH 20/48] EDAC/amd64: Rename debug_display_dimm_sizes() Use the "dct" and "umc" prefixes for legacy and modern versions respectively. Also, move the "dct" version to avoid a forward declaration, and fixup some checkpatch warnings in the process. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230127170419.1824692-7-yazen.ghannam@amd.com --- drivers/edac/amd64_edac.c | 128 +++++++++++++++++++------------------- 1 file changed, 63 insertions(+), 65 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 5b42533f306a..31166f3b391e 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -1291,7 +1291,65 @@ static unsigned long determine_edac_cap(struct amd64_pvt *pvt) return edac_cap; } -static void debug_display_dimm_sizes(struct amd64_pvt *, u8); +/* + * debug routine to display the memory sizes of all logical DIMMs and its + * CSROWs + */ +static void dct_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl) +{ + u32 *dcsb = ctrl ? pvt->csels[1].csbases : pvt->csels[0].csbases; + u32 dbam = ctrl ? pvt->dbam1 : pvt->dbam0; + int dimm, size0, size1; + + if (pvt->fam == 0xf) { + /* K8 families < revF not supported yet */ + if (pvt->ext_model < K8_REV_F) + return; + + WARN_ON(ctrl != 0); + } + + if (pvt->fam == 0x10) { + dbam = (ctrl && !dct_ganging_enabled(pvt)) ? pvt->dbam1 + : pvt->dbam0; + dcsb = (ctrl && !dct_ganging_enabled(pvt)) ? + pvt->csels[1].csbases : + pvt->csels[0].csbases; + } else if (ctrl) { + dbam = pvt->dbam0; + dcsb = pvt->csels[1].csbases; + } + edac_dbg(1, "F2x%d80 (DRAM Bank Address Mapping): 0x%08x\n", + ctrl, dbam); + + edac_printk(KERN_DEBUG, EDAC_MC, "DCT%d chip selects:\n", ctrl); + + /* Dump memory sizes for DIMM and its CSROWs */ + for (dimm = 0; dimm < 4; dimm++) { + size0 = 0; + if (dcsb[dimm * 2] & DCSB_CS_ENABLE) + /* + * For F15m60h, we need multiplier for LRDIMM cs_size + * calculation. We pass dimm value to the dbam_to_cs + * mapper so we can find the multiplier from the + * corresponding DCSM. + */ + size0 = pvt->ops->dbam_to_cs(pvt, ctrl, + DBAM_DIMM(dimm, dbam), + dimm); + + size1 = 0; + if (dcsb[dimm * 2 + 1] & DCSB_CS_ENABLE) + size1 = pvt->ops->dbam_to_cs(pvt, ctrl, + DBAM_DIMM(dimm, dbam), + dimm); + + amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n", + dimm * 2, size0, + dimm * 2 + 1, size1); + } +} + static void debug_dump_dramcfg_low(struct amd64_pvt *pvt, u32 dclr, int chan) { @@ -1366,7 +1424,7 @@ static int f17_get_cs_mode(int dimm, u8 ctrl, struct amd64_pvt *pvt) return cs_mode; } -static void debug_display_dimm_sizes_df(struct amd64_pvt *pvt, u8 ctrl) +static void umc_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl) { int dimm, size0, size1, cs0, cs1, cs_mode; @@ -1426,7 +1484,7 @@ static void __dump_misc_regs_df(struct amd64_pvt *pvt) i, 1 << ((tmp >> 4) & 0x3)); } - debug_display_dimm_sizes_df(pvt, i); + umc_debug_display_dimm_sizes(pvt, i); } } @@ -1451,13 +1509,13 @@ static void __dump_misc_regs(struct amd64_pvt *pvt) (pvt->fam == 0xf) ? k8_dhar_offset(pvt) : f10_dhar_offset(pvt)); - debug_display_dimm_sizes(pvt, 0); + dct_debug_display_dimm_sizes(pvt, 0); /* everything below this point is Fam10h and above */ if (pvt->fam == 0xf) return; - debug_display_dimm_sizes(pvt, 1); + dct_debug_display_dimm_sizes(pvt, 1); /* Only if NOT ganged does dclr1 have valid info */ if (!dct_ganging_enabled(pvt)) @@ -2681,66 +2739,6 @@ static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr, err->channel = get_channel_from_ecc_syndrome(mci, err->syndrome); } -/* - * debug routine to display the memory sizes of all logical DIMMs and its - * CSROWs - */ -static void debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl) -{ - int dimm, size0, size1; - u32 *dcsb = ctrl ? pvt->csels[1].csbases : pvt->csels[0].csbases; - u32 dbam = ctrl ? pvt->dbam1 : pvt->dbam0; - - if (pvt->fam == 0xf) { - /* K8 families < revF not supported yet */ - if (pvt->ext_model < K8_REV_F) - return; - else - WARN_ON(ctrl != 0); - } - - if (pvt->fam == 0x10) { - dbam = (ctrl && !dct_ganging_enabled(pvt)) ? pvt->dbam1 - : pvt->dbam0; - dcsb = (ctrl && !dct_ganging_enabled(pvt)) ? - pvt->csels[1].csbases : - pvt->csels[0].csbases; - } else if (ctrl) { - dbam = pvt->dbam0; - dcsb = pvt->csels[1].csbases; - } - edac_dbg(1, "F2x%d80 (DRAM Bank Address Mapping): 0x%08x\n", - ctrl, dbam); - - edac_printk(KERN_DEBUG, EDAC_MC, "DCT%d chip selects:\n", ctrl); - - /* Dump memory sizes for DIMM and its CSROWs */ - for (dimm = 0; dimm < 4; dimm++) { - - size0 = 0; - if (dcsb[dimm*2] & DCSB_CS_ENABLE) - /* - * For F15m60h, we need multiplier for LRDIMM cs_size - * calculation. We pass dimm value to the dbam_to_cs - * mapper so we can find the multiplier from the - * corresponding DCSM. - */ - size0 = pvt->ops->dbam_to_cs(pvt, ctrl, - DBAM_DIMM(dimm, dbam), - dimm); - - size1 = 0; - if (dcsb[dimm*2 + 1] & DCSB_CS_ENABLE) - size1 = pvt->ops->dbam_to_cs(pvt, ctrl, - DBAM_DIMM(dimm, dbam), - dimm); - - amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n", - dimm * 2, size0, - dimm * 2 + 1, size1); - } -} - static struct amd64_family_type family_types[] = { [K8_CPUS] = { .ctl_name = "K8", From c0984666fde9cf7da4c1123443b469f44e94dd84 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Fri, 27 Jan 2023 17:04:04 +0000 Subject: [PATCH 21/48] EDAC/amd64: Split get_csrow_nr_pages() into dct/umc functions Split get_csrow_nr_pages() into a legacy and modern versions in preparation for further legacy/modern refactoring. Also, rename f17_get_cs_mode() to match the new convention. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230127170419.1824692-8-yazen.ghannam@amd.com --- drivers/edac/amd64_edac.c | 40 +++++++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 31166f3b391e..77876b58242f 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -1392,7 +1392,7 @@ static void debug_dump_dramcfg_low(struct amd64_pvt *pvt, u32 dclr, int chan) #define CS_EVEN (CS_EVEN_PRIMARY | CS_EVEN_SECONDARY) #define CS_ODD (CS_ODD_PRIMARY | CS_ODD_SECONDARY) -static int f17_get_cs_mode(int dimm, u8 ctrl, struct amd64_pvt *pvt) +static int umc_get_cs_mode(int dimm, u8 ctrl, struct amd64_pvt *pvt) { u8 base, count = 0; int cs_mode = 0; @@ -1434,7 +1434,7 @@ static void umc_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl) cs0 = dimm * 2; cs1 = dimm * 2 + 1; - cs_mode = f17_get_cs_mode(dimm, ctrl, pvt); + cs_mode = umc_get_cs_mode(dimm, ctrl, pvt); size0 = pvt->ops->dbam_to_cs(pvt, ctrl, cs_mode, cs0); size1 = pvt->ops->dbam_to_cs(pvt, ctrl, cs_mode, cs1); @@ -3389,24 +3389,36 @@ skip: * encompasses * */ -static u32 get_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr_orig) +static u32 dct_get_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr) { u32 dbam = dct ? pvt->dbam1 : pvt->dbam0; - int csrow_nr = csrow_nr_orig; u32 cs_mode, nr_pages; - if (!pvt->umc) { - csrow_nr >>= 1; - cs_mode = DBAM_DIMM(csrow_nr, dbam); - } else { - cs_mode = f17_get_cs_mode(csrow_nr >> 1, dct, pvt); - } + csrow_nr >>= 1; + cs_mode = DBAM_DIMM(csrow_nr, dbam); nr_pages = pvt->ops->dbam_to_cs(pvt, dct, cs_mode, csrow_nr); nr_pages <<= 20 - PAGE_SHIFT; edac_dbg(0, "csrow: %d, channel: %d, DBAM idx: %d\n", - csrow_nr_orig, dct, cs_mode); + csrow_nr, dct, cs_mode); + edac_dbg(0, "nr_pages/channel: %u\n", nr_pages); + + return nr_pages; +} + +static u32 umc_get_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr_orig) +{ + int csrow_nr = csrow_nr_orig; + u32 cs_mode, nr_pages; + + cs_mode = umc_get_cs_mode(csrow_nr >> 1, dct, pvt); + + nr_pages = pvt->ops->dbam_to_cs(pvt, dct, cs_mode, csrow_nr); + nr_pages <<= 20 - PAGE_SHIFT; + + edac_dbg(0, "csrow: %d, channel: %d, cs_mode %d\n", + csrow_nr_orig, dct, cs_mode); edac_dbg(0, "nr_pages/channel: %u\n", nr_pages); return nr_pages; @@ -3445,7 +3457,7 @@ static int init_csrows_df(struct mem_ctl_info *mci) edac_dbg(1, "MC node: %d, csrow: %d\n", pvt->mc_node_id, cs); - dimm->nr_pages = get_csrow_nr_pages(pvt, umc, cs); + dimm->nr_pages = umc_get_csrow_nr_pages(pvt, umc, cs); dimm->mtype = pvt->umc[umc].dram_type; dimm->edac_mode = edac_mode; dimm->dtype = dev_type; @@ -3501,13 +3513,13 @@ static int init_csrows(struct mem_ctl_info *mci) pvt->mc_node_id, i); if (row_dct0) { - nr_pages = get_csrow_nr_pages(pvt, 0, i); + nr_pages = dct_get_csrow_nr_pages(pvt, 0, i); csrow->channels[0]->dimm->nr_pages = nr_pages; } /* K8 has only one DCT */ if (pvt->fam != 0xf && row_dct1) { - int row_dct1_pages = get_csrow_nr_pages(pvt, 1, i); + int row_dct1_pages = dct_get_csrow_nr_pages(pvt, 1, i); csrow->channels[1]->dimm->nr_pages = row_dct1_pages; nr_pages += row_dct1_pages; From a2e59ab8e93301466cb3ff3f2b7b65eb1a4e8786 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Fri, 27 Jan 2023 17:04:05 +0000 Subject: [PATCH 22/48] EDAC/amd64: Drop dbam_to_cs() for Family 17h and later The same function is used to calculate chip select size for all Zen-based family/models. Therefore, a family/model function pointer is not necessary. Drop the dbam_to_cs() function pointer for Family 17h and later systems. Also, move the Family 17h function to avoid a forward declaration. Rename it to indicate that the UMC Address Mask is used rather than the legacy DBAM value. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230127170419.1824692-9-yazen.ghannam@amd.com --- drivers/edac/amd64_edac.c | 186 +++++++++++++++++--------------------- 1 file changed, 81 insertions(+), 105 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 77876b58242f..652ed7185659 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -1424,6 +1424,84 @@ static int umc_get_cs_mode(int dimm, u8 ctrl, struct amd64_pvt *pvt) return cs_mode; } +static int umc_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc, + unsigned int cs_mode, int csrow_nr) +{ + u32 addr_mask_orig, addr_mask_deinterleaved; + u32 msb, weight, num_zero_bits; + int cs_mask_nr = csrow_nr; + int dimm, size = 0; + + /* No Chip Selects are enabled. */ + if (!cs_mode) + return size; + + /* Requested size of an even CS but none are enabled. */ + if (!(cs_mode & CS_EVEN) && !(csrow_nr & 1)) + return size; + + /* Requested size of an odd CS but none are enabled. */ + if (!(cs_mode & CS_ODD) && (csrow_nr & 1)) + return size; + + /* + * Family 17h introduced systems with one mask per DIMM, + * and two Chip Selects per DIMM. + * + * CS0 and CS1 -> MASK0 / DIMM0 + * CS2 and CS3 -> MASK1 / DIMM1 + * + * Family 19h Model 10h introduced systems with one mask per Chip Select, + * and two Chip Selects per DIMM. + * + * CS0 -> MASK0 -> DIMM0 + * CS1 -> MASK1 -> DIMM0 + * CS2 -> MASK2 -> DIMM1 + * CS3 -> MASK3 -> DIMM1 + * + * Keep the mask number equal to the Chip Select number for newer systems, + * and shift the mask number for older systems. + */ + dimm = csrow_nr >> 1; + + if (!fam_type->flags.zn_regs_v2) + cs_mask_nr >>= 1; + + /* Asymmetric dual-rank DIMM support. */ + if ((csrow_nr & 1) && (cs_mode & CS_ODD_SECONDARY)) + addr_mask_orig = pvt->csels[umc].csmasks_sec[cs_mask_nr]; + else + addr_mask_orig = pvt->csels[umc].csmasks[cs_mask_nr]; + + /* + * The number of zero bits in the mask is equal to the number of bits + * in a full mask minus the number of bits in the current mask. + * + * The MSB is the number of bits in the full mask because BIT[0] is + * always 0. + * + * In the special 3 Rank interleaving case, a single bit is flipped + * without swapping with the most significant bit. This can be handled + * by keeping the MSB where it is and ignoring the single zero bit. + */ + msb = fls(addr_mask_orig) - 1; + weight = hweight_long(addr_mask_orig); + num_zero_bits = msb - weight - !!(cs_mode & CS_3R_INTERLEAVE); + + /* Take the number of zero bits off from the top of the mask. */ + addr_mask_deinterleaved = GENMASK_ULL(msb - num_zero_bits, 1); + + edac_dbg(1, "CS%d DIMM%d AddrMasks:\n", csrow_nr, dimm); + edac_dbg(1, " Original AddrMask: 0x%x\n", addr_mask_orig); + edac_dbg(1, " Deinterleaved AddrMask: 0x%x\n", addr_mask_deinterleaved); + + /* Register [31:1] = Address [39:9]. Size is in kBs here. */ + size = (addr_mask_deinterleaved >> 2) + 1; + + /* Return size in MBs. */ + return size >> 10; +} + static void umc_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl) { int dimm, size0, size1, cs0, cs1, cs_mode; @@ -1436,8 +1514,8 @@ static void umc_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl) cs_mode = umc_get_cs_mode(dimm, ctrl, pvt); - size0 = pvt->ops->dbam_to_cs(pvt, ctrl, cs_mode, cs0); - size1 = pvt->ops->dbam_to_cs(pvt, ctrl, cs_mode, cs1); + size0 = umc_addr_mask_to_cs_size(pvt, ctrl, cs_mode, cs0); + size1 = umc_addr_mask_to_cs_size(pvt, ctrl, cs_mode, cs1); amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n", cs0, size0, @@ -2139,84 +2217,6 @@ static int f16_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct, return ddr3_cs_size(cs_mode, false); } -static int f17_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc, - unsigned int cs_mode, int csrow_nr) -{ - u32 addr_mask_orig, addr_mask_deinterleaved; - u32 msb, weight, num_zero_bits; - int cs_mask_nr = csrow_nr; - int dimm, size = 0; - - /* No Chip Selects are enabled. */ - if (!cs_mode) - return size; - - /* Requested size of an even CS but none are enabled. */ - if (!(cs_mode & CS_EVEN) && !(csrow_nr & 1)) - return size; - - /* Requested size of an odd CS but none are enabled. */ - if (!(cs_mode & CS_ODD) && (csrow_nr & 1)) - return size; - - /* - * Family 17h introduced systems with one mask per DIMM, - * and two Chip Selects per DIMM. - * - * CS0 and CS1 -> MASK0 / DIMM0 - * CS2 and CS3 -> MASK1 / DIMM1 - * - * Family 19h Model 10h introduced systems with one mask per Chip Select, - * and two Chip Selects per DIMM. - * - * CS0 -> MASK0 -> DIMM0 - * CS1 -> MASK1 -> DIMM0 - * CS2 -> MASK2 -> DIMM1 - * CS3 -> MASK3 -> DIMM1 - * - * Keep the mask number equal to the Chip Select number for newer systems, - * and shift the mask number for older systems. - */ - dimm = csrow_nr >> 1; - - if (!fam_type->flags.zn_regs_v2) - cs_mask_nr >>= 1; - - /* Asymmetric dual-rank DIMM support. */ - if ((csrow_nr & 1) && (cs_mode & CS_ODD_SECONDARY)) - addr_mask_orig = pvt->csels[umc].csmasks_sec[cs_mask_nr]; - else - addr_mask_orig = pvt->csels[umc].csmasks[cs_mask_nr]; - - /* - * The number of zero bits in the mask is equal to the number of bits - * in a full mask minus the number of bits in the current mask. - * - * The MSB is the number of bits in the full mask because BIT[0] is - * always 0. - * - * In the special 3 Rank interleaving case, a single bit is flipped - * without swapping with the most significant bit. This can be handled - * by keeping the MSB where it is and ignoring the single zero bit. - */ - msb = fls(addr_mask_orig) - 1; - weight = hweight_long(addr_mask_orig); - num_zero_bits = msb - weight - !!(cs_mode & CS_3R_INTERLEAVE); - - /* Take the number of zero bits off from the top of the mask. */ - addr_mask_deinterleaved = GENMASK_ULL(msb - num_zero_bits, 1); - - edac_dbg(1, "CS%d DIMM%d AddrMasks:\n", csrow_nr, dimm); - edac_dbg(1, " Original AddrMask: 0x%x\n", addr_mask_orig); - edac_dbg(1, " Deinterleaved AddrMask: 0x%x\n", addr_mask_deinterleaved); - - /* Register [31:1] = Address [39:9]. Size is in kBs here. */ - size = (addr_mask_deinterleaved >> 2) + 1; - - /* Return size in MBs. */ - return size >> 10; -} - static void read_dram_ctl_register(struct amd64_pvt *pvt) { @@ -2813,59 +2813,35 @@ static struct amd64_family_type family_types[] = { [F17_CPUS] = { .ctl_name = "F17h", .max_mcs = 2, - .ops = { - .dbam_to_cs = f17_addr_mask_to_cs_size, - } }, [F17_M10H_CPUS] = { .ctl_name = "F17h_M10h", .max_mcs = 2, - .ops = { - .dbam_to_cs = f17_addr_mask_to_cs_size, - } }, [F17_M30H_CPUS] = { .ctl_name = "F17h_M30h", .max_mcs = 8, - .ops = { - .dbam_to_cs = f17_addr_mask_to_cs_size, - } }, [F17_M60H_CPUS] = { .ctl_name = "F17h_M60h", .max_mcs = 2, - .ops = { - .dbam_to_cs = f17_addr_mask_to_cs_size, - } }, [F17_M70H_CPUS] = { .ctl_name = "F17h_M70h", .max_mcs = 2, - .ops = { - .dbam_to_cs = f17_addr_mask_to_cs_size, - } }, [F19_CPUS] = { .ctl_name = "F19h", .max_mcs = 8, - .ops = { - .dbam_to_cs = f17_addr_mask_to_cs_size, - } }, [F19_M10H_CPUS] = { .ctl_name = "F19h_M10h", .max_mcs = 12, .flags.zn_regs_v2 = 1, - .ops = { - .dbam_to_cs = f17_addr_mask_to_cs_size, - } }, [F19_M50H_CPUS] = { .ctl_name = "F19h_M50h", .max_mcs = 2, - .ops = { - .dbam_to_cs = f17_addr_mask_to_cs_size, - } }, }; @@ -3414,7 +3390,7 @@ static u32 umc_get_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr_or cs_mode = umc_get_cs_mode(csrow_nr >> 1, dct, pvt); - nr_pages = pvt->ops->dbam_to_cs(pvt, dct, cs_mode, csrow_nr); + nr_pages = umc_addr_mask_to_cs_size(pvt, dct, cs_mode, csrow_nr); nr_pages <<= 20 - PAGE_SHIFT; edac_dbg(0, "csrow: %d, channel: %d, cs_mode %d\n", From 5a1adb375d7a86ee3b703f895b61de20773b6670 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Fri, 27 Jan 2023 17:04:06 +0000 Subject: [PATCH 23/48] EDAC/amd64: Do not discover ECC symbol size for Family 17h and later The ECC symbol size was needed on legacy system to lookup the ECC syndrome. This is not needed on modern systems because the ECC syndrome is explicitly provided in the MCA information. Remove the ECC symbol size discovery code for modern UMC-based systems. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230127170419.1824692-10-yazen.ghannam@amd.com --- drivers/edac/amd64_edac.c | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 652ed7185659..bd0af17a318d 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -1600,6 +1600,8 @@ static void __dump_misc_regs(struct amd64_pvt *pvt) debug_dump_dramcfg_low(pvt, pvt->dclr1, 1); edac_dbg(1, " DramHoleValid: %s\n", dhar_valid(pvt) ? "yes" : "no"); + + amd64_info("using x%u syndromes.\n", pvt->ecc_sym_sz); } /* Display and decode various NB registers for debug purposes. */ @@ -1609,8 +1611,6 @@ static void dump_misc_regs(struct amd64_pvt *pvt) __dump_misc_regs_df(pvt); else __dump_misc_regs(pvt); - - amd64_info("using x%u syndromes.\n", pvt->ecc_sym_sz); } /* @@ -3197,22 +3197,7 @@ static void determine_ecc_sym_sz(struct amd64_pvt *pvt) { pvt->ecc_sym_sz = 4; - if (pvt->umc) { - u8 i; - - for_each_umc(i) { - /* Check enabled channels only: */ - if (pvt->umc[i].sdp_ctrl & UMC_SDP_INIT) { - if (pvt->umc[i].ecc_ctrl & BIT(9)) { - pvt->ecc_sym_sz = 16; - return; - } else if (pvt->umc[i].ecc_ctrl & BIT(7)) { - pvt->ecc_sym_sz = 8; - return; - } - } - } - } else if (pvt->fam >= 0x10) { + if (pvt->fam >= 0x10) { u32 tmp; amd64_read_pci_cfg(pvt->F3, EXT_NB_MCA_CFG, &tmp); From ed623d55eef46d0ca7557b9665b4693931bfb507 Mon Sep 17 00:00:00 2001 From: Muralidhara M K Date: Fri, 27 Jan 2023 17:04:07 +0000 Subject: [PATCH 24/48] EDAC/amd64: Merge struct amd64_family_type into struct amd64_pvt Future AMD systems will support heterogeneous "AMD Node" types, e.g. CPU and GPU types. Therefore, a global family type shared across all AMD nodes is no longer appropriate. Move struct low_ops routines and members of struct amd64_family_type to struct amd64_pvt. Currently, there are many code branches that split between "modern" and "legacy" systems. Another code branch will be needed in order to cover GPU cases. However, rather than introduce another branching case in multiple functions, the current branching code should be switched to a set of function pointers. This change makes the code more readable and simplifies adding support for new families/models. In order to reuse code, define two sets of function pointers. Use one for modern systems (Family 17h and later). This will not change between current CPU families. Use another set of function pointers for legacy systems (before Family 17h). Use the Family 16h versions as default for the legacy ops since these are the latest, and adjust the function pointers as needed for older families. [ Yazen: rebased/reworked patch and reworded commit message. ] [ bp: Fix rev8 or later check. ] Signed-off-by: Muralidhara M K Co-developed-by: Naveen Krishna Chatradhi Signed-off-by: Naveen Krishna Chatradhi Co-developed-by: Yazen Ghannam Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230127170419.1824692-11-yazen.ghannam@amd.com --- drivers/edac/amd64_edac.c | 308 ++++++++++++++------------------------ drivers/edac/amd64_edac.h | 62 +++----- 2 files changed, 133 insertions(+), 237 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index bd0af17a318d..ac64d27387c8 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -13,11 +13,9 @@ module_param(ecc_enable_override, int, 0644); static struct msr __percpu *msrs; -static struct amd64_family_type *fam_type; - -static inline u32 get_umc_reg(u32 reg) +static inline u32 get_umc_reg(struct amd64_pvt *pvt, u32 reg) { - if (!fam_type->flags.zn_regs_v2) + if (!pvt->flags.zn_regs_v2) return reg; switch (reg) { @@ -437,7 +435,7 @@ static void get_cs_base_and_mask(struct amd64_pvt *pvt, int csrow, u8 dct, for (i = 0; i < pvt->csels[dct].m_cnt; i++) #define for_each_umc(i) \ - for (i = 0; i < fam_type->max_mcs; i++) + for (i = 0; i < pvt->max_mcs; i++) /* * @input_addr is an InputAddr associated with the node given by mci. Return the @@ -1464,7 +1462,7 @@ static int umc_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc, */ dimm = csrow_nr >> 1; - if (!fam_type->flags.zn_regs_v2) + if (!pvt->flags.zn_regs_v2) cs_mask_nr >>= 1; /* Asymmetric dual-rank DIMM support. */ @@ -1556,7 +1554,7 @@ static void __dump_misc_regs_df(struct amd64_pvt *pvt) if (umc->dram_type == MEM_LRDDR4 || umc->dram_type == MEM_LRDDR5) { amd_smn_read(pvt->mc_node_id, - umc_base + get_umc_reg(UMCCH_ADDR_CFG), + umc_base + get_umc_reg(pvt, UMCCH_ADDR_CFG), &tmp); edac_dbg(1, "UMC%d LRDIMM %dx rank multiply\n", i, 1 << ((tmp >> 4) & 0x3)); @@ -1629,7 +1627,7 @@ static void prep_chip_selects(struct amd64_pvt *pvt) for_each_umc(umc) { pvt->csels[umc].b_cnt = 4; - pvt->csels[umc].m_cnt = fam_type->flags.zn_regs_v2 ? 4 : 2; + pvt->csels[umc].m_cnt = pvt->flags.zn_regs_v2 ? 4 : 2; } } else { @@ -1669,7 +1667,7 @@ static void read_umc_base_mask(struct amd64_pvt *pvt) } umc_mask_reg = get_umc_base(umc) + UMCCH_ADDR_MASK; - umc_mask_reg_sec = get_umc_base(umc) + get_umc_reg(UMCCH_ADDR_MASK_SEC); + umc_mask_reg_sec = get_umc_base(umc) + get_umc_reg(pvt, UMCCH_ADDR_MASK_SEC); for_each_chip_select_mask(cs, umc, pvt) { mask = &pvt->csels[umc].csmasks[cs]; @@ -1757,7 +1755,7 @@ static void determine_memory_type_df(struct amd64_pvt *pvt) * Check if the system supports the "DDR Type" field in UMC Config * and has DDR5 DIMMs in use. */ - if (fam_type->flags.zn_regs_v2 && ((umc->umc_cfg & GENMASK(2, 0)) == 0x1)) { + if (pvt->flags.zn_regs_v2 && ((umc->umc_cfg & GENMASK(2, 0)) == 0x1)) { if (umc->dimm_cfg & BIT(5)) umc->dram_type = MEM_LRDDR5; else if (umc->dimm_cfg & BIT(4)) @@ -2739,112 +2737,6 @@ static void f1x_map_sysaddr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr, err->channel = get_channel_from_ecc_syndrome(mci, err->syndrome); } -static struct amd64_family_type family_types[] = { - [K8_CPUS] = { - .ctl_name = "K8", - .f1_id = PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP, - .f2_id = PCI_DEVICE_ID_AMD_K8_NB_MEMCTL, - .max_mcs = 2, - .ops = { - .map_sysaddr_to_csrow = k8_map_sysaddr_to_csrow, - .dbam_to_cs = k8_dbam_to_chip_select, - } - }, - [F10_CPUS] = { - .ctl_name = "F10h", - .f1_id = PCI_DEVICE_ID_AMD_10H_NB_MAP, - .f2_id = PCI_DEVICE_ID_AMD_10H_NB_DRAM, - .max_mcs = 2, - .ops = { - .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, - .dbam_to_cs = f10_dbam_to_chip_select, - } - }, - [F15_CPUS] = { - .ctl_name = "F15h", - .f1_id = PCI_DEVICE_ID_AMD_15H_NB_F1, - .f2_id = PCI_DEVICE_ID_AMD_15H_NB_F2, - .max_mcs = 2, - .ops = { - .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, - .dbam_to_cs = f15_dbam_to_chip_select, - } - }, - [F15_M30H_CPUS] = { - .ctl_name = "F15h_M30h", - .f1_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F1, - .f2_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F2, - .max_mcs = 2, - .ops = { - .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, - .dbam_to_cs = f16_dbam_to_chip_select, - } - }, - [F15_M60H_CPUS] = { - .ctl_name = "F15h_M60h", - .f1_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F1, - .f2_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F2, - .max_mcs = 2, - .ops = { - .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, - .dbam_to_cs = f15_m60h_dbam_to_chip_select, - } - }, - [F16_CPUS] = { - .ctl_name = "F16h", - .f1_id = PCI_DEVICE_ID_AMD_16H_NB_F1, - .f2_id = PCI_DEVICE_ID_AMD_16H_NB_F2, - .max_mcs = 2, - .ops = { - .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, - .dbam_to_cs = f16_dbam_to_chip_select, - } - }, - [F16_M30H_CPUS] = { - .ctl_name = "F16h_M30h", - .f1_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F1, - .f2_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F2, - .max_mcs = 2, - .ops = { - .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, - .dbam_to_cs = f16_dbam_to_chip_select, - } - }, - [F17_CPUS] = { - .ctl_name = "F17h", - .max_mcs = 2, - }, - [F17_M10H_CPUS] = { - .ctl_name = "F17h_M10h", - .max_mcs = 2, - }, - [F17_M30H_CPUS] = { - .ctl_name = "F17h_M30h", - .max_mcs = 8, - }, - [F17_M60H_CPUS] = { - .ctl_name = "F17h_M60h", - .max_mcs = 2, - }, - [F17_M70H_CPUS] = { - .ctl_name = "F17h_M70h", - .max_mcs = 2, - }, - [F19_CPUS] = { - .ctl_name = "F19h", - .max_mcs = 8, - }, - [F19_M10H_CPUS] = { - .ctl_name = "F19h_M10h", - .max_mcs = 12, - .flags.zn_regs_v2 = 1, - }, - [F19_M50H_CPUS] = { - .ctl_name = "F19h_M50h", - .max_mcs = 2, - }, -}; - /* * These are tables of eigenvectors (one per line) which can be used for the * construction of the syndrome tables. The modified syndrome search algorithm @@ -3226,7 +3118,7 @@ static void __read_mc_regs_df(struct amd64_pvt *pvt) umc_base = get_umc_base(i); umc = &pvt->umc[i]; - amd_smn_read(nid, umc_base + get_umc_reg(UMCCH_DIMM_CFG), &umc->dimm_cfg); + amd_smn_read(nid, umc_base + get_umc_reg(pvt, UMCCH_DIMM_CFG), &umc->dimm_cfg); amd_smn_read(nid, umc_base + UMCCH_UMC_CFG, &umc->umc_cfg); amd_smn_read(nid, umc_base + UMCCH_SDP_CTRL, &umc->sdp_ctrl); amd_smn_read(nid, umc_base + UMCCH_ECC_CTRL, &umc->ecc_ctrl); @@ -3495,7 +3387,7 @@ static int init_csrows(struct mem_ctl_info *mci) : EDAC_SECDED; } - for (j = 0; j < fam_type->max_mcs; j++) { + for (j = 0; j < pvt->max_mcs; j++) { dimm = csrow->channels[j]->dimm; dimm->mtype = pvt->dram_type; dimm->edac_mode = edac_mode; @@ -3767,7 +3659,7 @@ static void setup_mci_misc_attrs(struct mem_ctl_info *mci) mci->edac_cap = determine_edac_cap(pvt); mci->mod_name = EDAC_MOD_STR; - mci->ctl_name = fam_type->ctl_name; + mci->ctl_name = pvt->ctl_name; mci->dev_name = pci_name(pvt->F3); mci->ctl_page_to_phys = NULL; @@ -3779,114 +3671,145 @@ static void setup_mci_misc_attrs(struct mem_ctl_info *mci) mci->get_sdram_scrub_rate = get_scrub_rate; } -/* - * returns a pointer to the family descriptor on success, NULL otherwise. - */ -static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt) +static struct low_ops umc_ops = { +}; + +/* Use Family 16h versions for defaults and adjust as needed below. */ +static struct low_ops dct_ops = { + .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, + .dbam_to_cs = f16_dbam_to_chip_select, +}; + +static int per_family_init(struct amd64_pvt *pvt) { pvt->ext_model = boot_cpu_data.x86_model >> 4; pvt->stepping = boot_cpu_data.x86_stepping; pvt->model = boot_cpu_data.x86_model; pvt->fam = boot_cpu_data.x86; + pvt->max_mcs = 2; + + /* + * Decide on which ops group to use here and do any family/model + * overrides below. + */ + if (pvt->fam >= 0x17) + pvt->ops = &umc_ops; + else + pvt->ops = &dct_ops; switch (pvt->fam) { case 0xf: - fam_type = &family_types[K8_CPUS]; - pvt->ops = &family_types[K8_CPUS].ops; + pvt->ctl_name = (pvt->ext_model >= K8_REV_F) ? + "K8 revF or later" : "K8 revE or earlier"; + pvt->f1_id = PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP; + pvt->f2_id = PCI_DEVICE_ID_AMD_K8_NB_MEMCTL; + pvt->ops->map_sysaddr_to_csrow = k8_map_sysaddr_to_csrow; + pvt->ops->dbam_to_cs = k8_dbam_to_chip_select; break; case 0x10: - fam_type = &family_types[F10_CPUS]; - pvt->ops = &family_types[F10_CPUS].ops; + pvt->ctl_name = "F10h"; + pvt->f1_id = PCI_DEVICE_ID_AMD_10H_NB_MAP; + pvt->f2_id = PCI_DEVICE_ID_AMD_10H_NB_DRAM; + pvt->ops->dbam_to_cs = f10_dbam_to_chip_select; break; case 0x15: - if (pvt->model == 0x30) { - fam_type = &family_types[F15_M30H_CPUS]; - pvt->ops = &family_types[F15_M30H_CPUS].ops; + switch (pvt->model) { + case 0x30: + pvt->ctl_name = "F15h_M30h"; + pvt->f1_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F1; + pvt->f2_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F2; break; - } else if (pvt->model == 0x60) { - fam_type = &family_types[F15_M60H_CPUS]; - pvt->ops = &family_types[F15_M60H_CPUS].ops; + case 0x60: + pvt->ctl_name = "F15h_M60h"; + pvt->f1_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F1; + pvt->f2_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F2; + pvt->ops->dbam_to_cs = f15_m60h_dbam_to_chip_select; + break; + case 0x13: + /* Richland is only client */ + return -ENODEV; + default: + pvt->ctl_name = "F15h"; + pvt->f1_id = PCI_DEVICE_ID_AMD_15H_NB_F1; + pvt->f2_id = PCI_DEVICE_ID_AMD_15H_NB_F2; + pvt->ops->dbam_to_cs = f15_dbam_to_chip_select; break; - /* Richland is only client */ - } else if (pvt->model == 0x13) { - return NULL; - } else { - fam_type = &family_types[F15_CPUS]; - pvt->ops = &family_types[F15_CPUS].ops; } break; case 0x16: - if (pvt->model == 0x30) { - fam_type = &family_types[F16_M30H_CPUS]; - pvt->ops = &family_types[F16_M30H_CPUS].ops; + switch (pvt->model) { + case 0x30: + pvt->ctl_name = "F16h_M30h"; + pvt->f1_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F1; + pvt->f2_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F2; + break; + default: + pvt->ctl_name = "F16h"; + pvt->f1_id = PCI_DEVICE_ID_AMD_16H_NB_F1; + pvt->f2_id = PCI_DEVICE_ID_AMD_16H_NB_F2; break; } - fam_type = &family_types[F16_CPUS]; - pvt->ops = &family_types[F16_CPUS].ops; break; case 0x17: - if (pvt->model >= 0x10 && pvt->model <= 0x2f) { - fam_type = &family_types[F17_M10H_CPUS]; - pvt->ops = &family_types[F17_M10H_CPUS].ops; + switch (pvt->model) { + case 0x10 ... 0x2f: + pvt->ctl_name = "F17h_M10h"; break; - } else if (pvt->model >= 0x30 && pvt->model <= 0x3f) { - fam_type = &family_types[F17_M30H_CPUS]; - pvt->ops = &family_types[F17_M30H_CPUS].ops; + case 0x30 ... 0x3f: + pvt->ctl_name = "F17h_M30h"; + pvt->max_mcs = 8; break; - } else if (pvt->model >= 0x60 && pvt->model <= 0x6f) { - fam_type = &family_types[F17_M60H_CPUS]; - pvt->ops = &family_types[F17_M60H_CPUS].ops; + case 0x60 ... 0x6f: + pvt->ctl_name = "F17h_M60h"; break; - } else if (pvt->model >= 0x70 && pvt->model <= 0x7f) { - fam_type = &family_types[F17_M70H_CPUS]; - pvt->ops = &family_types[F17_M70H_CPUS].ops; + case 0x70 ... 0x7f: + pvt->ctl_name = "F17h_M70h"; + break; + default: + pvt->ctl_name = "F17h"; break; } - fallthrough; - case 0x18: - fam_type = &family_types[F17_CPUS]; - pvt->ops = &family_types[F17_CPUS].ops; + break; - if (pvt->fam == 0x18) - family_types[F17_CPUS].ctl_name = "F18h"; + case 0x18: + pvt->ctl_name = "F18h"; break; case 0x19: - if (pvt->model >= 0x10 && pvt->model <= 0x1f) { - fam_type = &family_types[F19_M10H_CPUS]; - pvt->ops = &family_types[F19_M10H_CPUS].ops; + switch (pvt->model) { + case 0x00 ... 0x0f: + pvt->ctl_name = "F19h"; + pvt->max_mcs = 8; break; - } else if (pvt->model >= 0x20 && pvt->model <= 0x2f) { - fam_type = &family_types[F17_M70H_CPUS]; - pvt->ops = &family_types[F17_M70H_CPUS].ops; - fam_type->ctl_name = "F19h_M20h"; + case 0x10 ... 0x1f: + pvt->ctl_name = "F19h_M10h"; + pvt->max_mcs = 12; + pvt->flags.zn_regs_v2 = 1; break; - } else if (pvt->model >= 0x50 && pvt->model <= 0x5f) { - fam_type = &family_types[F19_M50H_CPUS]; - pvt->ops = &family_types[F19_M50H_CPUS].ops; - fam_type->ctl_name = "F19h_M50h"; + case 0x20 ... 0x2f: + pvt->ctl_name = "F19h_M20h"; break; - } else if (pvt->model >= 0xa0 && pvt->model <= 0xaf) { - fam_type = &family_types[F19_M10H_CPUS]; - pvt->ops = &family_types[F19_M10H_CPUS].ops; - fam_type->ctl_name = "F19h_MA0h"; + case 0x50 ... 0x5f: + pvt->ctl_name = "F19h_M50h"; + break; + case 0xa0 ... 0xaf: + pvt->ctl_name = "F19h_MA0h"; + pvt->max_mcs = 12; + pvt->flags.zn_regs_v2 = 1; break; } - fam_type = &family_types[F19_CPUS]; - pvt->ops = &family_types[F19_CPUS].ops; - family_types[F19_CPUS].ctl_name = "F19h"; break; default: amd64_err("Unsupported family!\n"); - return NULL; + return -ENODEV; } - return fam_type; + return 0; } static const struct attribute_group *amd64_edac_attr_groups[] = { @@ -3903,12 +3826,12 @@ static int hw_info_get(struct amd64_pvt *pvt) int ret; if (pvt->fam >= 0x17) { - pvt->umc = kcalloc(fam_type->max_mcs, sizeof(struct amd64_umc), GFP_KERNEL); + pvt->umc = kcalloc(pvt->max_mcs, sizeof(struct amd64_umc), GFP_KERNEL); if (!pvt->umc) return -ENOMEM; } else { - pci_id1 = fam_type->f1_id; - pci_id2 = fam_type->f2_id; + pci_id1 = pvt->f1_id; + pci_id2 = pvt->f2_id; } ret = reserve_mc_sibling_devs(pvt, pci_id1, pci_id2); @@ -3938,7 +3861,7 @@ static int init_one_instance(struct amd64_pvt *pvt) layers[0].size = pvt->csels[0].b_cnt; layers[0].is_virt_csrow = true; layers[1].type = EDAC_MC_LAYER_CHANNEL; - layers[1].size = fam_type->max_mcs; + layers[1].size = pvt->max_mcs; layers[1].is_virt_csrow = false; mci = edac_mc_alloc(pvt->mc_node_id, ARRAY_SIZE(layers), layers, 0); @@ -3968,7 +3891,7 @@ static bool instance_has_memory(struct amd64_pvt *pvt) bool cs_enabled = false; int cs = 0, dct = 0; - for (dct = 0; dct < fam_type->max_mcs; dct++) { + for (dct = 0; dct < pvt->max_mcs; dct++) { for_each_chip_select(cs, dct, pvt) cs_enabled |= csrow_enabled(cs, dct, pvt); } @@ -3997,9 +3920,8 @@ static int probe_one_instance(unsigned int nid) pvt->mc_node_id = nid; pvt->F3 = F3; - ret = -ENODEV; - fam_type = per_family_init(pvt); - if (!fam_type) + ret = per_family_init(pvt); + if (ret < 0) goto err_enable; ret = hw_info_get(pvt); @@ -4038,11 +3960,7 @@ static int probe_one_instance(unsigned int nid) goto err_enable; } - amd64_info("%s %sdetected (node %d).\n", fam_type->ctl_name, - (pvt->fam == 0xf ? - (pvt->ext_model >= K8_REV_F ? "revF or later " - : "revE or earlier ") - : ""), pvt->mc_node_id); + amd64_info("%s detected (node %d).\n", pvt->ctl_name, pvt->mc_node_id); dump_misc_regs(pvt); diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index e4329dff8cf2..8eea15546b9f 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -273,25 +273,6 @@ #define UMC_SDP_INIT BIT(31) -enum amd_families { - K8_CPUS = 0, - F10_CPUS, - F15_CPUS, - F15_M30H_CPUS, - F15_M60H_CPUS, - F16_CPUS, - F16_M30H_CPUS, - F17_CPUS, - F17_M10H_CPUS, - F17_M30H_CPUS, - F17_M60H_CPUS, - F17_M70H_CPUS, - F19_CPUS, - F19_M10H_CPUS, - F19_M50H_CPUS, - NUM_FAMILIES, -}; - /* Error injection control structure */ struct error_injection { u32 section; @@ -334,6 +315,16 @@ struct amd64_umc { enum mem_type dram_type; }; +struct amd64_family_flags { + /* + * Indicates that the system supports the new register offsets, etc. + * first introduced with Family 19h Model 10h. + */ + __u64 zn_regs_v2 : 1, + + __reserved : 63; +}; + struct amd64_pvt { struct low_ops *ops; @@ -375,6 +366,12 @@ struct amd64_pvt { /* x4, x8, or x16 syndromes in use */ u8 ecc_sym_sz; + const char *ctl_name; + u16 f1_id, f2_id; + /* Maximum number of memory controllers per die/node. */ + u8 max_mcs; + + struct amd64_family_flags flags; /* place to store error injection parameters prior to issue */ struct error_injection injection; @@ -465,29 +462,10 @@ struct ecc_settings { * functions and per device encoding/decoding logic. */ struct low_ops { - void (*map_sysaddr_to_csrow) (struct mem_ctl_info *mci, u64 sys_addr, - struct err_info *); - int (*dbam_to_cs) (struct amd64_pvt *pvt, u8 dct, - unsigned cs_mode, int cs_mask_nr); -}; - -struct amd64_family_flags { - /* - * Indicates that the system supports the new register offsets, etc. - * first introduced with Family 19h Model 10h. - */ - __u64 zn_regs_v2 : 1, - - __reserved : 63; -}; - -struct amd64_family_type { - const char *ctl_name; - u16 f1_id, f2_id; - /* Maximum number of memory controllers per die/node. */ - u8 max_mcs; - struct amd64_family_flags flags; - struct low_ops ops; + void (*map_sysaddr_to_csrow)(struct mem_ctl_info *mci, u64 sys_addr, + struct err_info *err); + int (*dbam_to_cs)(struct amd64_pvt *pvt, u8 dct, + unsigned int cs_mode, int cs_mask_nr); }; int __amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset, From 9a97a7f4d7b26cb61cdbb328b9d8982d94c99170 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Fri, 27 Jan 2023 17:04:08 +0000 Subject: [PATCH 25/48] EDAC/amd64: Rework hw_info_{get,put} The bulk of system-specific information is gathered at init time with hw_info_get(). This function calls a number of helper functions, and many of these helper functions are split between a modern UMC/DF path and a legacy DCT path. Split hw_info_get() into legacy and modern versions. This creates two separate code paths early on, and legacy and modern helper functions can be called directly in the appropriate code path. Also, simplify hw_info_put() and share it between legacy and modern systems. NULL pointer checks are done in pci_dev_put() and kfree(), so they can be called unconditionally. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230127170419.1824692-12-yazen.ghannam@amd.com --- drivers/edac/amd64_edac.c | 78 +++++++++++++++++---------------------- drivers/edac/amd64_edac.h | 1 + 2 files changed, 34 insertions(+), 45 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index ac64d27387c8..d2a9793c097e 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -3045,9 +3045,6 @@ log_error: static int reserve_mc_sibling_devs(struct amd64_pvt *pvt, u16 pci_id1, u16 pci_id2) { - if (pvt->umc) - return 0; - /* Reserve the ADDRESS MAP Device */ pvt->F1 = pci_get_related_function(pvt->F3->vendor, pci_id1, pvt->F3); if (!pvt->F1) { @@ -3075,16 +3072,6 @@ reserve_mc_sibling_devs(struct amd64_pvt *pvt, u16 pci_id1, u16 pci_id2) return 0; } -static void free_mc_sibling_devs(struct amd64_pvt *pvt) -{ - if (pvt->umc) { - return; - } else { - pci_dev_put(pvt->F1); - pci_dev_put(pvt->F2); - } -} - static void determine_ecc_sym_sz(struct amd64_pvt *pvt) { pvt->ecc_sym_sz = 4; @@ -3671,13 +3658,45 @@ static void setup_mci_misc_attrs(struct mem_ctl_info *mci) mci->get_sdram_scrub_rate = get_scrub_rate; } +static int dct_hw_info_get(struct amd64_pvt *pvt) +{ + int ret = reserve_mc_sibling_devs(pvt, pvt->f1_id, pvt->f2_id); + + if (ret) + return ret; + + read_mc_regs(pvt); + + return 0; +} + +static int umc_hw_info_get(struct amd64_pvt *pvt) +{ + pvt->umc = kcalloc(pvt->max_mcs, sizeof(struct amd64_umc), GFP_KERNEL); + if (!pvt->umc) + return -ENOMEM; + + read_mc_regs(pvt); + + return 0; +} + +static void hw_info_put(struct amd64_pvt *pvt) +{ + pci_dev_put(pvt->F1); + pci_dev_put(pvt->F2); + kfree(pvt->umc); +} + static struct low_ops umc_ops = { + .hw_info_get = umc_hw_info_get, }; /* Use Family 16h versions for defaults and adjust as needed below. */ static struct low_ops dct_ops = { .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, .dbam_to_cs = f16_dbam_to_chip_select, + .hw_info_get = dct_hw_info_get, }; static int per_family_init(struct amd64_pvt *pvt) @@ -3820,37 +3839,6 @@ static const struct attribute_group *amd64_edac_attr_groups[] = { NULL }; -static int hw_info_get(struct amd64_pvt *pvt) -{ - u16 pci_id1 = 0, pci_id2 = 0; - int ret; - - if (pvt->fam >= 0x17) { - pvt->umc = kcalloc(pvt->max_mcs, sizeof(struct amd64_umc), GFP_KERNEL); - if (!pvt->umc) - return -ENOMEM; - } else { - pci_id1 = pvt->f1_id; - pci_id2 = pvt->f2_id; - } - - ret = reserve_mc_sibling_devs(pvt, pci_id1, pci_id2); - if (ret) - return ret; - - read_mc_regs(pvt); - - return 0; -} - -static void hw_info_put(struct amd64_pvt *pvt) -{ - if (pvt->F1) - free_mc_sibling_devs(pvt); - - kfree(pvt->umc); -} - static int init_one_instance(struct amd64_pvt *pvt) { struct mem_ctl_info *mci = NULL; @@ -3924,7 +3912,7 @@ static int probe_one_instance(unsigned int nid) if (ret < 0) goto err_enable; - ret = hw_info_get(pvt); + ret = pvt->ops->hw_info_get(pvt); if (ret < 0) goto err_enable; diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index 8eea15546b9f..00b3f32e3cbb 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -466,6 +466,7 @@ struct low_ops { struct err_info *err); int (*dbam_to_cs)(struct amd64_pvt *pvt, u8 dct, unsigned int cs_mode, int cs_mask_nr); + int (*hw_info_get)(struct amd64_pvt *pvt); }; int __amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset, From 637f60ef2cc7c015c2e065503f8da89272ab208d Mon Sep 17 00:00:00 2001 From: Muralidhara M K Date: Fri, 27 Jan 2023 17:04:09 +0000 Subject: [PATCH 26/48] EDAC/amd64: Split prep_chip_selects() into dct/umc functions Call them from their respective hw_info_get() function. Avoid the need for family/model-based function pointers. Add the calls before reading hardware registers from the memory controllers, since the number of chip select bases and masks needs to be known first. [ Yazen: rebased/reworked patch and reworded commit message. ] Signed-off-by: Muralidhara M K Co-developed-by: Naveen Krishna Chatradhi Signed-off-by: Naveen Krishna Chatradhi Co-developed-by: Yazen Ghannam Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230127170419.1824692-13-yazen.ghannam@amd.com --- drivers/edac/amd64_edac.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index d2a9793c097e..0ba63c93f8f4 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -1614,7 +1614,7 @@ static void dump_misc_regs(struct amd64_pvt *pvt) /* * See BKDG, F2x[1,0][5C:40], F2[1,0][6C:60] */ -static void prep_chip_selects(struct amd64_pvt *pvt) +static void dct_prep_chip_selects(struct amd64_pvt *pvt) { if (pvt->fam == 0xf && pvt->ext_model < K8_REV_F) { pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 8; @@ -1622,20 +1622,22 @@ static void prep_chip_selects(struct amd64_pvt *pvt) } else if (pvt->fam == 0x15 && pvt->model == 0x30) { pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 4; pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 2; - } else if (pvt->fam >= 0x17) { - int umc; - - for_each_umc(umc) { - pvt->csels[umc].b_cnt = 4; - pvt->csels[umc].m_cnt = pvt->flags.zn_regs_v2 ? 4 : 2; - } - } else { pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 8; pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 4; } } +static void umc_prep_chip_selects(struct amd64_pvt *pvt) +{ + int umc; + + for_each_umc(umc) { + pvt->csels[umc].b_cnt = 4; + pvt->csels[umc].m_cnt = pvt->flags.zn_regs_v2 ? 4 : 2; + } +} + static void read_umc_base_mask(struct amd64_pvt *pvt) { u32 umc_base_reg, umc_base_reg_sec; @@ -1694,8 +1696,6 @@ static void read_dct_base_mask(struct amd64_pvt *pvt) { int cs; - prep_chip_selects(pvt); - if (pvt->umc) return read_umc_base_mask(pvt); @@ -3665,6 +3665,7 @@ static int dct_hw_info_get(struct amd64_pvt *pvt) if (ret) return ret; + dct_prep_chip_selects(pvt); read_mc_regs(pvt); return 0; @@ -3676,6 +3677,7 @@ static int umc_hw_info_get(struct amd64_pvt *pvt) if (!pvt->umc) return -ENOMEM; + umc_prep_chip_selects(pvt); read_mc_regs(pvt); return 0; From b29dad9bf3ce13e7a37f7fc75ee2c95ac48b9f08 Mon Sep 17 00:00:00 2001 From: Muralidhara M K Date: Fri, 27 Jan 2023 17:04:10 +0000 Subject: [PATCH 27/48] EDAC/amd64: Split read_base_mask() into dct/umc functions Call them from their respective hw_info_get() paths. Call the new functions after the setting the chip select base and mask counts, since those are need to read the correct number of chip select base and mask registers. And call the new functions before the remaining set up, because the base and mask register values will be needed later. [Yazen: Rebased/reworked patch and reworded commit message. ] Signed-off-by: Muralidhara M K Co-developed-by: Naveen Krishna Chatradhi Signed-off-by: Naveen Krishna Chatradhi Co-developed-by: Yazen Ghannam Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230127170419.1824692-14-yazen.ghannam@amd.com --- drivers/edac/amd64_edac.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 0ba63c93f8f4..0729ce10e483 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -1638,7 +1638,7 @@ static void umc_prep_chip_selects(struct amd64_pvt *pvt) } } -static void read_umc_base_mask(struct amd64_pvt *pvt) +static void umc_read_base_mask(struct amd64_pvt *pvt) { u32 umc_base_reg, umc_base_reg_sec; u32 umc_mask_reg, umc_mask_reg_sec; @@ -1692,13 +1692,10 @@ static void read_umc_base_mask(struct amd64_pvt *pvt) /* * Function 2 Offset F10_DCSB0; read in the DCS Base and DCS Mask registers */ -static void read_dct_base_mask(struct amd64_pvt *pvt) +static void dct_read_base_mask(struct amd64_pvt *pvt) { int cs; - if (pvt->umc) - return read_umc_base_mask(pvt); - for_each_chip_select(cs, 0, pvt) { int reg0 = DCSB0 + (cs * 4); int reg1 = DCSB1 + (cs * 4); @@ -3185,7 +3182,6 @@ static void read_mc_regs(struct amd64_pvt *pvt) } skip: - read_dct_base_mask(pvt); determine_memory_type(pvt); @@ -3666,6 +3662,7 @@ static int dct_hw_info_get(struct amd64_pvt *pvt) return ret; dct_prep_chip_selects(pvt); + dct_read_base_mask(pvt); read_mc_regs(pvt); return 0; @@ -3678,6 +3675,7 @@ static int umc_hw_info_get(struct amd64_pvt *pvt) return -ENOMEM; umc_prep_chip_selects(pvt); + umc_read_base_mask(pvt); read_mc_regs(pvt); return 0; From 78ec161a91dda15ecdfa153196bf0a739f0cb9a9 Mon Sep 17 00:00:00 2001 From: Muralidhara M K Date: Fri, 27 Jan 2023 17:04:11 +0000 Subject: [PATCH 28/48] EDAC/amd64: Split determine_memory_type() into dct/umc functions Call them from their respective hw_info_get() paths. Call them after all other hardware registers have been saved, since the memory type for a device will be determined based on the saved information. [ Yazen: rebased/reworked patch and reworded commit message. ] Signed-off-by: Muralidhara M K Co-developed-by: Naveen Krishna Chatradhi Signed-off-by: Naveen Krishna Chatradhi Co-developed-by: Yazen Ghannam Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230127170419.1824692-15-yazen.ghannam@amd.com --- drivers/edac/amd64_edac.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 0729ce10e483..375bc0275e8e 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -1735,7 +1735,7 @@ static void dct_read_base_mask(struct amd64_pvt *pvt) } } -static void determine_memory_type_df(struct amd64_pvt *pvt) +static void umc_determine_memory_type(struct amd64_pvt *pvt) { struct amd64_umc *umc; u32 i; @@ -1772,13 +1772,10 @@ static void determine_memory_type_df(struct amd64_pvt *pvt) } } -static void determine_memory_type(struct amd64_pvt *pvt) +static void dct_determine_memory_type(struct amd64_pvt *pvt) { u32 dram_ctrl, dcsm; - if (pvt->umc) - return determine_memory_type_df(pvt); - switch (pvt->fam) { case 0xf: if (pvt->ext_model >= K8_REV_F) @@ -1828,6 +1825,8 @@ static void determine_memory_type(struct amd64_pvt *pvt) WARN(1, KERN_ERR "%s: Family??? 0x%x\n", __func__, pvt->fam); pvt->dram_type = MEM_EMPTY; } + + edac_dbg(1, " DIMM type: %s\n", edac_mem_types[pvt->dram_type]); return; ddr3: @@ -3183,10 +3182,6 @@ static void read_mc_regs(struct amd64_pvt *pvt) skip: - determine_memory_type(pvt); - - if (!pvt->umc) - edac_dbg(1, " DIMM type: %s\n", edac_mem_types[pvt->dram_type]); determine_ecc_sym_sz(pvt); } @@ -3664,6 +3659,7 @@ static int dct_hw_info_get(struct amd64_pvt *pvt) dct_prep_chip_selects(pvt); dct_read_base_mask(pvt); read_mc_regs(pvt); + dct_determine_memory_type(pvt); return 0; } @@ -3677,6 +3673,7 @@ static int umc_hw_info_get(struct amd64_pvt *pvt) umc_prep_chip_selects(pvt); umc_read_base_mask(pvt); read_mc_regs(pvt); + umc_determine_memory_type(pvt); return 0; } From 32ecdf8688d7b6ae183cce88c9307850ff6918f0 Mon Sep 17 00:00:00 2001 From: Muralidhara M K Date: Fri, 27 Jan 2023 17:04:12 +0000 Subject: [PATCH 29/48] EDAC/amd64: Split read_mc_regs() into dct/umc functions Call them from their respective hw_info_get() paths. ECC symbol size is not needed on UMC systems, so determine_ecc_sym_sz() is left out of the UMC path. Do not save TOP_MEM* values on modern controllers because they're not needed there (read: they were used only for debugging, if anything). [ Yazen: rebased/reworked patch and reworded commit message. ] Signed-off-by: Muralidhara M K Co-developed-by: Naveen Krishna Chatradhi Signed-off-by: Naveen Krishna Chatradhi Co-developed-by: Yazen Ghannam Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230127170419.1824692-16-yazen.ghannam@amd.com --- drivers/edac/amd64_edac.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 375bc0275e8e..df95f3be6580 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -3089,7 +3089,7 @@ static void determine_ecc_sym_sz(struct amd64_pvt *pvt) /* * Retrieve the hardware registers of the memory controller. */ -static void __read_mc_regs_df(struct amd64_pvt *pvt) +static void umc_read_mc_regs(struct amd64_pvt *pvt) { u8 nid = pvt->mc_node_id; struct amd64_umc *umc; @@ -3113,7 +3113,7 @@ static void __read_mc_regs_df(struct amd64_pvt *pvt) * Retrieve the hardware registers of the memory controller (this includes the * 'Address Map' and 'Misc' device regs) */ -static void read_mc_regs(struct amd64_pvt *pvt) +static void dct_read_mc_regs(struct amd64_pvt *pvt) { unsigned int range; u64 msr_val; @@ -3134,12 +3134,6 @@ static void read_mc_regs(struct amd64_pvt *pvt) edac_dbg(0, " TOP_MEM2 disabled\n"); } - if (pvt->umc) { - __read_mc_regs_df(pvt); - - goto skip; - } - amd64_read_pci_cfg(pvt->F3, NBCAP, &pvt->nbcap); read_dram_ctl_register(pvt); @@ -3180,9 +3174,6 @@ static void read_mc_regs(struct amd64_pvt *pvt) amd64_read_dct_pci_cfg(pvt, 1, DCHR0, &pvt->dchr1); } -skip: - - determine_ecc_sym_sz(pvt); } @@ -3658,7 +3649,7 @@ static int dct_hw_info_get(struct amd64_pvt *pvt) dct_prep_chip_selects(pvt); dct_read_base_mask(pvt); - read_mc_regs(pvt); + dct_read_mc_regs(pvt); dct_determine_memory_type(pvt); return 0; @@ -3672,7 +3663,7 @@ static int umc_hw_info_get(struct amd64_pvt *pvt) umc_prep_chip_selects(pvt); umc_read_base_mask(pvt); - read_mc_regs(pvt); + umc_read_mc_regs(pvt); umc_determine_memory_type(pvt); return 0; From eb2bcdfc3755342f6af5b9b8365f27c05e4bf275 Mon Sep 17 00:00:00 2001 From: Muralidhara M K Date: Fri, 27 Jan 2023 17:04:13 +0000 Subject: [PATCH 30/48] EDAC/amd64: Split ecc_enabled() into dct/umc functions Call them using a function pointer in pvt->ops. The "ECC enabled" check is done outside of the hardware information gathering done in hw_info_get(). So a high-level function pointer is needed to separate the legacy and modern paths. No functional change is intended. [Yazen: rebased/reworked patch and reworded commit message. ] Signed-off-by: Muralidhara M K Co-developed-by: Naveen Krishna Chatradhi Signed-off-by: Naveen Krishna Chatradhi Co-developed-by: Yazen Ghannam Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230127170419.1824692-17-yazen.ghannam@amd.com --- drivers/edac/amd64_edac.c | 81 ++++++++++++++++++++++----------------- drivers/edac/amd64_edac.h | 1 + 2 files changed, 46 insertions(+), 36 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index df95f3be6580..ef8f097cfe3d 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -3527,48 +3527,21 @@ static void restore_ecc_error_reporting(struct ecc_settings *s, u16 nid, amd64_warn("Error restoring NB MCGCTL settings!\n"); } -static bool ecc_enabled(struct amd64_pvt *pvt) +static bool dct_ecc_enabled(struct amd64_pvt *pvt) { u16 nid = pvt->mc_node_id; bool nb_mce_en = false; - u8 ecc_en = 0, i; + u8 ecc_en = 0; u32 value; - if (boot_cpu_data.x86 >= 0x17) { - u8 umc_en_mask = 0, ecc_en_mask = 0; - struct amd64_umc *umc; + amd64_read_pci_cfg(pvt->F3, NBCFG, &value); - for_each_umc(i) { - umc = &pvt->umc[i]; + ecc_en = !!(value & NBCFG_ECC_ENABLE); - /* Only check enabled UMCs. */ - if (!(umc->sdp_ctrl & UMC_SDP_INIT)) - continue; - - umc_en_mask |= BIT(i); - - if (umc->umc_cap_hi & UMC_ECC_ENABLED) - ecc_en_mask |= BIT(i); - } - - /* Check whether at least one UMC is enabled: */ - if (umc_en_mask) - ecc_en = umc_en_mask == ecc_en_mask; - else - edac_dbg(0, "Node %d: No enabled UMCs.\n", nid); - - /* Assume UMC MCA banks are enabled. */ - nb_mce_en = true; - } else { - amd64_read_pci_cfg(pvt->F3, NBCFG, &value); - - ecc_en = !!(value & NBCFG_ECC_ENABLE); - - nb_mce_en = nb_mce_bank_enabled_on_node(nid); - if (!nb_mce_en) - edac_dbg(0, "NB MCE bank disabled, set MSR 0x%08x[4] on node %d to enable.\n", - MSR_IA32_MCG_CTL, nid); - } + nb_mce_en = nb_mce_bank_enabled_on_node(nid); + if (!nb_mce_en) + edac_dbg(0, "NB MCE bank disabled, set MSR 0x%08x[4] on node %d to enable.\n", + MSR_IA32_MCG_CTL, nid); edac_dbg(3, "Node %d: DRAM ECC %s.\n", nid, (ecc_en ? "enabled" : "disabled")); @@ -3578,6 +3551,40 @@ static bool ecc_enabled(struct amd64_pvt *pvt) return true; } +static bool umc_ecc_enabled(struct amd64_pvt *pvt) +{ + u8 umc_en_mask = 0, ecc_en_mask = 0; + u16 nid = pvt->mc_node_id; + struct amd64_umc *umc; + u8 ecc_en = 0, i; + + for_each_umc(i) { + umc = &pvt->umc[i]; + + /* Only check enabled UMCs. */ + if (!(umc->sdp_ctrl & UMC_SDP_INIT)) + continue; + + umc_en_mask |= BIT(i); + + if (umc->umc_cap_hi & UMC_ECC_ENABLED) + ecc_en_mask |= BIT(i); + } + + /* Check whether at least one UMC is enabled: */ + if (umc_en_mask) + ecc_en = umc_en_mask == ecc_en_mask; + else + edac_dbg(0, "Node %d: No enabled UMCs.\n", nid); + + edac_dbg(3, "Node %d: DRAM ECC %s.\n", nid, (ecc_en ? "enabled" : "disabled")); + + if (!ecc_en) + return false; + else + return true; +} + static inline void f17h_determine_edac_ctl_cap(struct mem_ctl_info *mci, struct amd64_pvt *pvt) { @@ -3678,6 +3685,7 @@ static void hw_info_put(struct amd64_pvt *pvt) static struct low_ops umc_ops = { .hw_info_get = umc_hw_info_get, + .ecc_enabled = umc_ecc_enabled, }; /* Use Family 16h versions for defaults and adjust as needed below. */ @@ -3685,6 +3693,7 @@ static struct low_ops dct_ops = { .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, .dbam_to_cs = f16_dbam_to_chip_select, .hw_info_get = dct_hw_info_get, + .ecc_enabled = dct_ecc_enabled, }; static int per_family_init(struct amd64_pvt *pvt) @@ -3910,7 +3919,7 @@ static int probe_one_instance(unsigned int nid) goto err_enable; } - if (!ecc_enabled(pvt)) { + if (!pvt->ops->ecc_enabled(pvt)) { ret = -ENODEV; if (!ecc_enable_override) diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index 00b3f32e3cbb..103cd38a6302 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -467,6 +467,7 @@ struct low_ops { int (*dbam_to_cs)(struct amd64_pvt *pvt, u8 dct, unsigned int cs_mode, int cs_mask_nr); int (*hw_info_get)(struct amd64_pvt *pvt); + bool (*ecc_enabled)(struct amd64_pvt *pvt); }; int __amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset, From 0a42a37f65155f9aa9ca40a959ad7d64f6cdc37a Mon Sep 17 00:00:00 2001 From: Muralidhara M K Date: Fri, 27 Jan 2023 17:04:14 +0000 Subject: [PATCH 31/48] EDAC/amd64: Split setup_mci_misc_attrs() into dct/umc functions The init_one_instance() path is shared between legacy and modern systems. So add the new functions to a function pointer in pvt->ops. No functional change is intended. [ Yazen: Rebased/reworked patch and reworded commit message. ] Signed-off-by: Muralidhara M K Co-developed-by: Naveen Krishna Chatradhi Signed-off-by: Naveen Krishna Chatradhi Co-developed-by: Yazen Ghannam Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230127170419.1824692-18-yazen.ghannam@amd.com --- drivers/edac/amd64_edac.c | 37 ++++++++++++++++++++++++------------- drivers/edac/amd64_edac.h | 1 + 2 files changed, 25 insertions(+), 13 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index ef8f097cfe3d..261a6f662f2a 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -3616,22 +3616,18 @@ f17h_determine_edac_ctl_cap(struct mem_ctl_info *mci, struct amd64_pvt *pvt) } } -static void setup_mci_misc_attrs(struct mem_ctl_info *mci) +static void dct_setup_mci_misc_attrs(struct mem_ctl_info *mci) { struct amd64_pvt *pvt = mci->pvt_info; mci->mtype_cap = MEM_FLAG_DDR2 | MEM_FLAG_RDDR2; mci->edac_ctl_cap = EDAC_FLAG_NONE; - if (pvt->umc) { - f17h_determine_edac_ctl_cap(mci, pvt); - } else { - if (pvt->nbcap & NBCAP_SECDED) - mci->edac_ctl_cap |= EDAC_FLAG_SECDED; + if (pvt->nbcap & NBCAP_SECDED) + mci->edac_ctl_cap |= EDAC_FLAG_SECDED; - if (pvt->nbcap & NBCAP_CHIPKILL) - mci->edac_ctl_cap |= EDAC_FLAG_S4ECD4ED; - } + if (pvt->nbcap & NBCAP_CHIPKILL) + mci->edac_ctl_cap |= EDAC_FLAG_S4ECD4ED; mci->edac_cap = determine_edac_cap(pvt); mci->mod_name = EDAC_MOD_STR; @@ -3639,14 +3635,27 @@ static void setup_mci_misc_attrs(struct mem_ctl_info *mci) mci->dev_name = pci_name(pvt->F3); mci->ctl_page_to_phys = NULL; - if (pvt->fam >= 0x17) - return; - /* memory scrubber interface */ mci->set_sdram_scrub_rate = set_scrub_rate; mci->get_sdram_scrub_rate = get_scrub_rate; } +static void umc_setup_mci_misc_attrs(struct mem_ctl_info *mci) +{ + struct amd64_pvt *pvt = mci->pvt_info; + + mci->mtype_cap = MEM_FLAG_DDR4 | MEM_FLAG_RDDR4; + mci->edac_ctl_cap = EDAC_FLAG_NONE; + + f17h_determine_edac_ctl_cap(mci, pvt); + + mci->edac_cap = determine_edac_cap(pvt); + mci->mod_name = EDAC_MOD_STR; + mci->ctl_name = pvt->ctl_name; + mci->dev_name = pci_name(pvt->F3); + mci->ctl_page_to_phys = NULL; +} + static int dct_hw_info_get(struct amd64_pvt *pvt) { int ret = reserve_mc_sibling_devs(pvt, pvt->f1_id, pvt->f2_id); @@ -3686,6 +3695,7 @@ static void hw_info_put(struct amd64_pvt *pvt) static struct low_ops umc_ops = { .hw_info_get = umc_hw_info_get, .ecc_enabled = umc_ecc_enabled, + .setup_mci_misc_attrs = umc_setup_mci_misc_attrs, }; /* Use Family 16h versions for defaults and adjust as needed below. */ @@ -3694,6 +3704,7 @@ static struct low_ops dct_ops = { .dbam_to_cs = f16_dbam_to_chip_select, .hw_info_get = dct_hw_info_get, .ecc_enabled = dct_ecc_enabled, + .setup_mci_misc_attrs = dct_setup_mci_misc_attrs, }; static int per_family_init(struct amd64_pvt *pvt) @@ -3856,7 +3867,7 @@ static int init_one_instance(struct amd64_pvt *pvt) mci->pvt_info = pvt; mci->pdev = &pvt->F3->dev; - setup_mci_misc_attrs(mci); + pvt->ops->setup_mci_misc_attrs(mci); if (init_csrows(mci)) mci->edac_cap = EDAC_FLAG_NONE; diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index 103cd38a6302..1fb39d7981a2 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -468,6 +468,7 @@ struct low_ops { unsigned int cs_mode, int cs_mask_nr); int (*hw_info_get)(struct amd64_pvt *pvt); bool (*ecc_enabled)(struct amd64_pvt *pvt); + void (*setup_mci_misc_attrs)(struct mem_ctl_info *mci); }; int __amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset, From 9369239e8d8b3e91eaa61bca4a546ff7d67bc8bc Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Fri, 27 Jan 2023 17:04:15 +0000 Subject: [PATCH 32/48] EDAC/amd64: Rename f17h_determine_edac_ctl_cap() ...to match the "umc_" prefix convention. No functional change is intended. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230127170419.1824692-19-yazen.ghannam@amd.com --- drivers/edac/amd64_edac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 261a6f662f2a..d116d3ccb309 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -3586,7 +3586,7 @@ static bool umc_ecc_enabled(struct amd64_pvt *pvt) } static inline void -f17h_determine_edac_ctl_cap(struct mem_ctl_info *mci, struct amd64_pvt *pvt) +umc_determine_edac_ctl_cap(struct mem_ctl_info *mci, struct amd64_pvt *pvt) { u8 i, ecc_en = 1, cpk_en = 1, dev_x4 = 1, dev_x16 = 1; @@ -3647,7 +3647,7 @@ static void umc_setup_mci_misc_attrs(struct mem_ctl_info *mci) mci->mtype_cap = MEM_FLAG_DDR4 | MEM_FLAG_RDDR4; mci->edac_ctl_cap = EDAC_FLAG_NONE; - f17h_determine_edac_ctl_cap(mci, pvt); + umc_determine_edac_ctl_cap(mci, pvt); mci->edac_cap = determine_edac_cap(pvt); mci->mod_name = EDAC_MOD_STR; From f6a4b4a1aa1647a5a9c58aeef099a7a8593151fe Mon Sep 17 00:00:00 2001 From: Muralidhara M K Date: Fri, 27 Jan 2023 17:04:16 +0000 Subject: [PATCH 33/48] EDAC/amd64: Split determine_edac_cap() into dct/umc functions Call them from their respective setup_mci_misc_attrs() paths. No functional change is intended. [ Yazen: rebased/reworked patch and reworded commit message. ] Signed-off-by: Muralidhara M K Co-developed-by: Naveen Krishna Chatradhi Signed-off-by: Naveen Krishna Chatradhi Co-developed-by: Yazen Ghannam Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230127170419.1824692-20-yazen.ghannam@amd.com --- drivers/edac/amd64_edac.c | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index d116d3ccb309..344afad5841c 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -1256,13 +1256,25 @@ static int get_channel_from_ecc_syndrome(struct mem_ctl_info *, u16); * Determine if the DIMMs have ECC enabled. ECC is enabled ONLY if all the DIMMs * are ECC capable. */ -static unsigned long determine_edac_cap(struct amd64_pvt *pvt) +static unsigned long dct_determine_edac_cap(struct amd64_pvt *pvt) { unsigned long edac_cap = EDAC_FLAG_NONE; u8 bit; - if (pvt->umc) { - u8 i, umc_en_mask = 0, dimm_ecc_en_mask = 0; + bit = (pvt->fam > 0xf || pvt->ext_model >= K8_REV_F) + ? 19 + : 17; + + if (pvt->dclr0 & BIT(bit)) + edac_cap = EDAC_FLAG_SECDED; + + return edac_cap; +} + +static unsigned long umc_determine_edac_cap(struct amd64_pvt *pvt) +{ + u8 i, umc_en_mask = 0, dimm_ecc_en_mask = 0; + unsigned long edac_cap = EDAC_FLAG_NONE; for_each_umc(i) { if (!(pvt->umc[i].sdp_ctrl & UMC_SDP_INIT)) @@ -1277,14 +1289,6 @@ static unsigned long determine_edac_cap(struct amd64_pvt *pvt) if (umc_en_mask == dimm_ecc_en_mask) edac_cap = EDAC_FLAG_SECDED; - } else { - bit = (pvt->fam > 0xf || pvt->ext_model >= K8_REV_F) - ? 19 - : 17; - - if (pvt->dclr0 & BIT(bit)) - edac_cap = EDAC_FLAG_SECDED; - } return edac_cap; } @@ -3629,7 +3633,7 @@ static void dct_setup_mci_misc_attrs(struct mem_ctl_info *mci) if (pvt->nbcap & NBCAP_CHIPKILL) mci->edac_ctl_cap |= EDAC_FLAG_S4ECD4ED; - mci->edac_cap = determine_edac_cap(pvt); + mci->edac_cap = dct_determine_edac_cap(pvt); mci->mod_name = EDAC_MOD_STR; mci->ctl_name = pvt->ctl_name; mci->dev_name = pci_name(pvt->F3); @@ -3649,7 +3653,7 @@ static void umc_setup_mci_misc_attrs(struct mem_ctl_info *mci) umc_determine_edac_ctl_cap(mci, pvt); - mci->edac_cap = determine_edac_cap(pvt); + mci->edac_cap = umc_determine_edac_cap(pvt); mci->mod_name = EDAC_MOD_STR; mci->ctl_name = pvt->ctl_name; mci->dev_name = pci_name(pvt->F3); From 6fb8b5fb9e324c617092d61bbe193aa6a49ff2dc Mon Sep 17 00:00:00 2001 From: Muralidhara M K Date: Fri, 27 Jan 2023 17:04:17 +0000 Subject: [PATCH 34/48] EDAC/amd64: Split init_csrows() into dct/umc functions Call them from their respective setup_mci_misc_attrs() paths. Also, drop the check for an "empty" device, i.e. one without memory. This is redundant and already done in instance_has_memory() earlier in the init path. No functional change is intended. [ Yazen: rebased/reworked patch and reworded commit message. ] Signed-off-by: Muralidhara M K Co-developed-by: Naveen Krishna Chatradhi Signed-off-by: Naveen Krishna Chatradhi Co-developed-by: Yazen Ghannam Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230127170419.1824692-21-yazen.ghannam@amd.com --- drivers/edac/amd64_edac.c | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 344afad5841c..0d96eaedc4a1 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -3250,13 +3250,12 @@ static u32 umc_get_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr_or return nr_pages; } -static int init_csrows_df(struct mem_ctl_info *mci) +static void umc_init_csrows(struct mem_ctl_info *mci) { struct amd64_pvt *pvt = mci->pvt_info; enum edac_type edac_mode = EDAC_NONE; enum dev_type dev_type = DEV_UNKNOWN; struct dimm_info *dimm; - int empty = 1; u8 umc, cs; if (mci->edac_ctl_cap & EDAC_FLAG_S16ECD16ED) { @@ -3277,7 +3276,6 @@ static int init_csrows_df(struct mem_ctl_info *mci) if (!csrow_enabled(cs, umc, pvt)) continue; - empty = 0; dimm = mci->csrows[cs]->channels[umc]->dimm; edac_dbg(1, "MC node: %d, csrow: %d\n", @@ -3290,27 +3288,22 @@ static int init_csrows_df(struct mem_ctl_info *mci) dimm->grain = 64; } } - - return empty; } /* * Initialize the array of csrow attribute instances, based on the values * from pci config hardware registers. */ -static int init_csrows(struct mem_ctl_info *mci) +static void dct_init_csrows(struct mem_ctl_info *mci) { struct amd64_pvt *pvt = mci->pvt_info; enum edac_type edac_mode = EDAC_NONE; struct csrow_info *csrow; struct dimm_info *dimm; - int i, j, empty = 1; int nr_pages = 0; + int i, j; u32 val; - if (pvt->umc) - return init_csrows_df(mci); - amd64_read_pci_cfg(pvt->F3, NBCFG, &val); pvt->nbcfg = val; @@ -3333,7 +3326,6 @@ static int init_csrows(struct mem_ctl_info *mci) continue; csrow = mci->csrows[i]; - empty = 0; edac_dbg(1, "MC node: %d, csrow: %d\n", pvt->mc_node_id, i); @@ -3367,8 +3359,6 @@ static int init_csrows(struct mem_ctl_info *mci) dimm->grain = 64; } } - - return empty; } /* get all cores on this DCT */ @@ -3642,6 +3632,8 @@ static void dct_setup_mci_misc_attrs(struct mem_ctl_info *mci) /* memory scrubber interface */ mci->set_sdram_scrub_rate = set_scrub_rate; mci->get_sdram_scrub_rate = get_scrub_rate; + + dct_init_csrows(mci); } static void umc_setup_mci_misc_attrs(struct mem_ctl_info *mci) @@ -3658,6 +3650,8 @@ static void umc_setup_mci_misc_attrs(struct mem_ctl_info *mci) mci->ctl_name = pvt->ctl_name; mci->dev_name = pci_name(pvt->F3); mci->ctl_page_to_phys = NULL; + + umc_init_csrows(mci); } static int dct_hw_info_get(struct amd64_pvt *pvt) @@ -3873,9 +3867,6 @@ static int init_one_instance(struct amd64_pvt *pvt) pvt->ops->setup_mci_misc_attrs(mci); - if (init_csrows(mci)) - mci->edac_cap = EDAC_FLAG_NONE; - ret = -ENODEV; if (edac_mc_add_mc_with_groups(mci, amd64_edac_attr_groups)) { edac_dbg(1, "failed edac_mc_add_mc()\n"); From f6f36382d6353f648ca111661e0584cd7eb5962a Mon Sep 17 00:00:00 2001 From: Muralidhara M K Date: Fri, 27 Jan 2023 17:04:18 +0000 Subject: [PATCH 35/48] EDAC/amd64: Split dump_misc_regs() into dct/umc functions Add a function pointer to pvt->ops. No functional change is intended. [ Yazen: Rebased/reworked patch and reworded commit message. ] Signed-off-by: Muralidhara M K Co-developed-by: Naveen Krishna Chatradhi Signed-off-by: Naveen Krishna Chatradhi Co-developed-by: Yazen Ghannam Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230127170419.1824692-22-yazen.ghannam@amd.com --- drivers/edac/amd64_edac.c | 19 ++++++------------- drivers/edac/amd64_edac.h | 1 + 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 0d96eaedc4a1..7446b4802a62 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -1525,7 +1525,7 @@ static void umc_debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl) } } -static void __dump_misc_regs_df(struct amd64_pvt *pvt) +static void umc_dump_misc_regs(struct amd64_pvt *pvt) { struct amd64_umc *umc; u32 i, tmp, umc_base; @@ -1568,8 +1568,7 @@ static void __dump_misc_regs_df(struct amd64_pvt *pvt) } } -/* Display and decode various NB registers for debug purposes. */ -static void __dump_misc_regs(struct amd64_pvt *pvt) +static void dct_dump_misc_regs(struct amd64_pvt *pvt) { edac_dbg(1, "F3xE8 (NB Cap): 0x%08x\n", pvt->nbcap); @@ -1606,15 +1605,6 @@ static void __dump_misc_regs(struct amd64_pvt *pvt) amd64_info("using x%u syndromes.\n", pvt->ecc_sym_sz); } -/* Display and decode various NB registers for debug purposes. */ -static void dump_misc_regs(struct amd64_pvt *pvt) -{ - if (pvt->umc) - __dump_misc_regs_df(pvt); - else - __dump_misc_regs(pvt); -} - /* * See BKDG, F2x[1,0][5C:40], F2[1,0][6C:60] */ @@ -3694,6 +3684,7 @@ static struct low_ops umc_ops = { .hw_info_get = umc_hw_info_get, .ecc_enabled = umc_ecc_enabled, .setup_mci_misc_attrs = umc_setup_mci_misc_attrs, + .dump_misc_regs = umc_dump_misc_regs, }; /* Use Family 16h versions for defaults and adjust as needed below. */ @@ -3703,6 +3694,7 @@ static struct low_ops dct_ops = { .hw_info_get = dct_hw_info_get, .ecc_enabled = dct_ecc_enabled, .setup_mci_misc_attrs = dct_setup_mci_misc_attrs, + .dump_misc_regs = dct_dump_misc_regs, }; static int per_family_init(struct amd64_pvt *pvt) @@ -3953,7 +3945,8 @@ static int probe_one_instance(unsigned int nid) amd64_info("%s detected (node %d).\n", pvt->ctl_name, pvt->mc_node_id); - dump_misc_regs(pvt); + /* Display and decode various registers for debug purposes. */ + pvt->ops->dump_misc_regs(pvt); return ret; diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index 1fb39d7981a2..1c64fd4a14b1 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -469,6 +469,7 @@ struct low_ops { int (*hw_info_get)(struct amd64_pvt *pvt); bool (*ecc_enabled)(struct amd64_pvt *pvt); void (*setup_mci_misc_attrs)(struct mem_ctl_info *mci); + void (*dump_misc_regs)(struct amd64_pvt *pvt); }; int __amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset, From b3ece3a6a231e51c262d67fd47c017970a7dee44 Mon Sep 17 00:00:00 2001 From: Muralidhara M K Date: Fri, 27 Jan 2023 17:04:19 +0000 Subject: [PATCH 36/48] EDAC/amd64: Add get_err_info() to pvt->ops GPU Nodes will use a different method to determine the chip select and channel of an error. A function pointer should be used rather than introduce another branching condition. Prepare for this by adding get_err_info() to pvt->ops. This function is only called from the modern code path, so a legacy function is not defined. Make sure to call this after MCA_STATUS[SyndV] is checked, since the csrow value is found in MCA_SYND. [ Yazen: rebased/reworked patch and reworded commit message. ] Signed-off-by: Muralidhara M K Co-developed-by: Naveen Krishna Chatradhi Signed-off-by: Naveen Krishna Chatradhi Co-developed-by: Yazen Ghannam Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230127170419.1824692-23-yazen.ghannam@amd.com --- drivers/edac/amd64_edac.c | 13 ++++++++----- drivers/edac/amd64_edac.h | 1 + 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 7446b4802a62..85b460b67838 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -2974,10 +2974,14 @@ static inline void decode_bus_error(int node_id, struct mce *m) * Currently, we can derive the channel number by looking at the 6th nibble in * the instance_id. For example, instance_id=0xYXXXXX where Y is the channel * number. + * + * For DRAM ECC errors, the Chip Select number is given in bits [2:0] of + * the MCA_SYND[ErrorInformation] field. */ -static int find_umc_channel(struct mce *m) +static void umc_get_err_info(struct mce *m, struct err_info *err) { - return (m->ipid & GENMASK(31, 0)) >> 20; + err->channel = (m->ipid & GENMASK(31, 0)) >> 20; + err->csrow = m->synd & 0x7; } static void decode_umc_error(int node_id, struct mce *m) @@ -2999,8 +3003,6 @@ static void decode_umc_error(int node_id, struct mce *m) if (m->status & MCI_STATUS_DEFERRED) ecc_type = 3; - err.channel = find_umc_channel(m); - if (!(m->status & MCI_STATUS_SYNDV)) { err.err_code = ERR_SYND; goto log_error; @@ -3015,7 +3017,7 @@ static void decode_umc_error(int node_id, struct mce *m) err.err_code = ERR_CHANNEL; } - err.csrow = m->synd & 0x7; + pvt->ops->get_err_info(m, &err); if (umc_normaddr_to_sysaddr(m->addr, pvt->mc_node_id, err.channel, &sys_addr)) { err.err_code = ERR_NORM_ADDR; @@ -3685,6 +3687,7 @@ static struct low_ops umc_ops = { .ecc_enabled = umc_ecc_enabled, .setup_mci_misc_attrs = umc_setup_mci_misc_attrs, .dump_misc_regs = umc_dump_misc_regs, + .get_err_info = umc_get_err_info, }; /* Use Family 16h versions for defaults and adjust as needed below. */ diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index 1c64fd4a14b1..e84fe0d4120a 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -470,6 +470,7 @@ struct low_ops { bool (*ecc_enabled)(struct amd64_pvt *pvt); void (*setup_mci_misc_attrs)(struct mem_ctl_info *mci); void (*dump_misc_regs)(struct amd64_pvt *pvt); + void (*get_err_info)(struct mce *m, struct err_info *err); }; int __amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset, From 88e378d400fa0544d51cf62037e7774d8a4b4379 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Wed, 8 Mar 2023 18:40:02 -0800 Subject: [PATCH 37/48] x86/ioremap: Add hypervisor callback for private MMIO mapping in coco VM Current code always maps MMIO devices as shared (decrypted) in a confidential computing VM. But Hyper-V guest VMs on AMD SEV-SNP with vTOM use a paravisor running in VMPL0 to emulate some devices, such as the IO-APIC and TPM. In such a case, the device must be accessed as private (encrypted) because the paravisor emulates the device at an address below vTOM, where all accesses are encrypted. Add a new hypervisor callback to determine if an MMIO address should be mapped private. The callback allows hypervisor-specific code to handle any quirks, the use of a paravisor, etc. in determining whether a mapping must be private. If the callback is not used by a hypervisor, default to returning "false", which is consistent with normal coco VM behavior. Use this callback as another special case to check for when doing ioremap(). Just checking the starting address is sufficient as an ioremap range must be all private or all shared. Also make the callback in early boot IO-APIC mapping code that uses the fixmap. [ bp: Touchups. ] Signed-off-by: Michael Kelley Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/1678329614-3482-2-git-send-email-mikelley@microsoft.com --- arch/x86/include/asm/x86_init.h | 4 ++++ arch/x86/kernel/apic/io_apic.c | 10 ++++++++-- arch/x86/kernel/x86_init.c | 2 ++ arch/x86/mm/ioremap.c | 5 +++++ 4 files changed, 19 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index c1c8c581759d..acc20ae4079d 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -259,11 +259,15 @@ struct x86_legacy_features { * VMMCALL under SEV-ES. Needs to return 'false' * if the checks fail. Called from the #VC * exception handler. + * @is_private_mmio: For CoCo VMs, must map MMIO address as private. + * Used when device is emulated by a paravisor + * layer in the VM context. */ struct x86_hyper_runtime { void (*pin_vcpu)(int cpu); void (*sev_es_hcall_prepare)(struct ghcb *ghcb, struct pt_regs *regs); bool (*sev_es_hcall_finish)(struct ghcb *ghcb, struct pt_regs *regs); + bool (*is_private_mmio)(u64 addr); }; /** diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 1f83b052bb74..146671de9ddc 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -66,6 +66,7 @@ #include #include #include +#include #define for_each_ioapic(idx) \ for ((idx) = 0; (idx) < nr_ioapics; (idx)++) @@ -2680,10 +2681,15 @@ static void io_apic_set_fixmap(enum fixed_addresses idx, phys_addr_t phys) pgprot_t flags = FIXMAP_PAGE_NOCACHE; /* - * Ensure fixmaps for IOAPIC MMIO respect memory encryption pgprot + * Ensure fixmaps for IO-APIC MMIO respect memory encryption pgprot * bits, just like normal ioremap(): */ - flags = pgprot_decrypted(flags); + if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) { + if (x86_platform.hyper.is_private_mmio(phys)) + flags = pgprot_encrypted(flags); + else + flags = pgprot_decrypted(flags); + } __set_fixmap(idx, phys, flags); } diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index ef80d361b463..95be3831df73 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -134,6 +134,7 @@ static void enc_status_change_prepare_noop(unsigned long vaddr, int npages, bool static bool enc_status_change_finish_noop(unsigned long vaddr, int npages, bool enc) { return false; } static bool enc_tlb_flush_required_noop(bool enc) { return false; } static bool enc_cache_flush_required_noop(void) { return false; } +static bool is_private_mmio_noop(u64 addr) {return false; } struct x86_platform_ops x86_platform __ro_after_init = { .calibrate_cpu = native_calibrate_cpu_early, @@ -149,6 +150,7 @@ struct x86_platform_ops x86_platform __ro_after_init = { .realmode_reserve = reserve_real_mode, .realmode_init = init_real_mode, .hyper.pin_vcpu = x86_op_int_noop, + .hyper.is_private_mmio = is_private_mmio_noop, .guest = { .enc_status_change_prepare = enc_status_change_prepare_noop, diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 6453fbaedb08..aa7d279321ea 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -116,6 +116,11 @@ static void __ioremap_check_other(resource_size_t addr, struct ioremap_desc *des if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) return; + if (x86_platform.hyper.is_private_mmio(addr)) { + desc->flags |= IORES_MAP_ENCRYPTED; + return; + } + if (!IS_ENABLED(CONFIG_EFI)) return; From 71290be18f2deeae013482bf79cd526df61fcfcd Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Sun, 26 Mar 2023 06:51:57 -0700 Subject: [PATCH 38/48] x86/hyperv: Reorder code to facilitate future work Reorder some code to facilitate future work. No functional change. Signed-off-by: Michael Kelley Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Tianyu Lan Link: https://lore.kernel.org/r/1679838727-87310-3-git-send-email-mikelley@microsoft.com --- arch/x86/hyperv/ivm.c | 68 +++++++++++++++++++++---------------------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c index 1dbcbd9da74d..f33c67ef1b25 100644 --- a/arch/x86/hyperv/ivm.c +++ b/arch/x86/hyperv/ivm.c @@ -235,40 +235,6 @@ void hv_ghcb_msr_read(u64 msr, u64 *value) EXPORT_SYMBOL_GPL(hv_ghcb_msr_read); #endif -enum hv_isolation_type hv_get_isolation_type(void) -{ - if (!(ms_hyperv.priv_high & HV_ISOLATION)) - return HV_ISOLATION_TYPE_NONE; - return FIELD_GET(HV_ISOLATION_TYPE, ms_hyperv.isolation_config_b); -} -EXPORT_SYMBOL_GPL(hv_get_isolation_type); - -/* - * hv_is_isolation_supported - Check system runs in the Hyper-V - * isolation VM. - */ -bool hv_is_isolation_supported(void) -{ - if (!cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) - return false; - - if (!hypervisor_is_type(X86_HYPER_MS_HYPERV)) - return false; - - return hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE; -} - -DEFINE_STATIC_KEY_FALSE(isolation_type_snp); - -/* - * hv_isolation_type_snp - Check system runs in the AMD SEV-SNP based - * isolation VM. - */ -bool hv_isolation_type_snp(void) -{ - return static_branch_unlikely(&isolation_type_snp); -} - /* * hv_mark_gpa_visibility - Set pages visible to host via hvcall. * @@ -387,3 +353,37 @@ void hv_unmap_memory(void *addr) { vunmap(addr); } + +enum hv_isolation_type hv_get_isolation_type(void) +{ + if (!(ms_hyperv.priv_high & HV_ISOLATION)) + return HV_ISOLATION_TYPE_NONE; + return FIELD_GET(HV_ISOLATION_TYPE, ms_hyperv.isolation_config_b); +} +EXPORT_SYMBOL_GPL(hv_get_isolation_type); + +/* + * hv_is_isolation_supported - Check system runs in the Hyper-V + * isolation VM. + */ +bool hv_is_isolation_supported(void) +{ + if (!cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) + return false; + + if (!hypervisor_is_type(X86_HYPER_MS_HYPERV)) + return false; + + return hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE; +} + +DEFINE_STATIC_KEY_FALSE(isolation_type_snp); + +/* + * hv_isolation_type_snp - Check system runs in the AMD SEV-SNP based + * isolation VM. + */ +bool hv_isolation_type_snp(void) +{ + return static_branch_unlikely(&isolation_type_snp); +} From d33ddc92db8a61416473ff3d7f1c621c50733dc0 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Sun, 26 Mar 2023 06:51:58 -0700 Subject: [PATCH 39/48] Drivers: hv: Explicitly request decrypted in vmap_pfn() calls Update vmap_pfn() calls to explicitly request that the mapping be for decrypted access to the memory. There's no change in functionality since the PFNs passed to vmap_pfn() are above the shared_gpa_boundary, implicitly producing a decrypted mapping. But explicitly requesting "decrypted" allows the code to work before and after changes that cause vmap_pfn() to mask the PFNs to being below the shared_gpa_boundary. Signed-off-by: Michael Kelley Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Tianyu Lan Link: https://lore.kernel.org/r/1679838727-87310-4-git-send-email-mikelley@microsoft.com --- arch/x86/hyperv/ivm.c | 2 +- drivers/hv/ring_buffer.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c index f33c67ef1b25..5648efb6c73e 100644 --- a/arch/x86/hyperv/ivm.c +++ b/arch/x86/hyperv/ivm.c @@ -343,7 +343,7 @@ void *hv_map_memory(void *addr, unsigned long size) pfns[i] = vmalloc_to_pfn(addr + i * PAGE_SIZE) + (ms_hyperv.shared_gpa_boundary >> PAGE_SHIFT); - vaddr = vmap_pfn(pfns, size / PAGE_SIZE, PAGE_KERNEL_IO); + vaddr = vmap_pfn(pfns, size / PAGE_SIZE, pgprot_decrypted(PAGE_KERNEL)); kfree(pfns); return vaddr; diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c index c6692fd5ab15..2111e97c3b63 100644 --- a/drivers/hv/ring_buffer.c +++ b/drivers/hv/ring_buffer.c @@ -211,7 +211,7 @@ int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info, ring_info->ring_buffer = (struct hv_ring_buffer *) vmap_pfn(pfns_wraparound, page_cnt * 2 - 1, - PAGE_KERNEL); + pgprot_decrypted(PAGE_KERNEL)); kfree(pfns_wraparound); if (!ring_info->ring_buffer) From c7b5254bd802ee3868f1c59333545272dc700d6d Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Wed, 8 Mar 2023 18:40:05 -0800 Subject: [PATCH 40/48] x86/mm: Handle decryption/re-encryption of bss_decrypted consistently sme_postprocess_startup() decrypts the bss_decrypted section when sme_me_mask is non-zero. mem_encrypt_free_decrypted_mem() re-encrypts the unused portion based on CC_ATTR_MEM_ENCRYPT. In a Hyper-V guest VM using vTOM, these conditions are not equivalent as sme_me_mask is always zero when using vTOM. Consequently, mem_encrypt_free_decrypted_mem() attempts to re-encrypt memory that was never decrypted. So check sme_me_mask in mem_encrypt_free_decrypted_mem() too. Hyper-V guests using vTOM don't need the bss_decrypted section to be decrypted, so skipping the decryption/re-encryption doesn't cause a problem. Signed-off-by: Michael Kelley Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Tom Lendacky Link: https://lore.kernel.org/r/1678329614-3482-5-git-send-email-mikelley@microsoft.com --- arch/x86/mm/mem_encrypt_amd.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c index 9c4d8dbcb129..e0b51c09109f 100644 --- a/arch/x86/mm/mem_encrypt_amd.c +++ b/arch/x86/mm/mem_encrypt_amd.c @@ -513,10 +513,14 @@ void __init mem_encrypt_free_decrypted_mem(void) npages = (vaddr_end - vaddr) >> PAGE_SHIFT; /* - * The unused memory range was mapped decrypted, change the encryption - * attribute from decrypted to encrypted before freeing it. + * If the unused memory range was mapped decrypted, change the encryption + * attribute from decrypted to encrypted before freeing it. Base the + * re-encryption on the same condition used for the decryption in + * sme_postprocess_startup(). Higher level abstractions, such as + * CC_ATTR_MEM_ENCRYPT, aren't necessarily equivalent in a Hyper-V VM + * using vTOM, where sme_me_mask is always zero. */ - if (cc_platform_has(CC_ATTR_MEM_ENCRYPT)) { + if (sme_me_mask) { r = set_memory_encrypted(vaddr, npages); if (r) { pr_warn("failed to free unused decrypted pages\n"); From e45e761b77bc0739e7e23258c4394013bbb919c7 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Wed, 8 Mar 2023 18:40:06 -0800 Subject: [PATCH 41/48] init: Call mem_encrypt_init() after Hyper-V hypercall init is done Full Hyper-V initialization, including support for hypercalls, is done as an apic_post_init callback via late_time_init(). mem_encrypt_init() needs to make hypercalls when it marks swiotlb memory as decrypted. But mem_encrypt_init() is currently called a few lines before late_time_init(), so the hypercalls don't work. Fix this by moving mem_encrypt_init() after late_time_init() and related clock initializations. The intervening initializations don't do any I/O that requires the swiotlb, so moving mem_encrypt_init() slightly later has no impact. Signed-off-by: Michael Kelley Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Tom Lendacky Link: https://lore.kernel.org/r/1678329614-3482-6-git-send-email-mikelley@microsoft.com --- init/main.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/init/main.c b/init/main.c index 4425d1783d5c..7e9c0ca25643 100644 --- a/init/main.c +++ b/init/main.c @@ -1088,14 +1088,6 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void) */ locking_selftest(); - /* - * This needs to be called before any devices perform DMA - * operations that might use the SWIOTLB bounce buffers. It will - * mark the bounce buffers as decrypted so that their usage will - * not cause "plain-text" data to be decrypted when accessed. - */ - mem_encrypt_init(); - #ifdef CONFIG_BLK_DEV_INITRD if (initrd_start && !initrd_below_start_ok && page_to_pfn(virt_to_page((void *)initrd_start)) < min_low_pfn) { @@ -1112,6 +1104,17 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void) late_time_init(); sched_clock_init(); calibrate_delay(); + + /* + * This needs to be called before any devices perform DMA + * operations that might use the SWIOTLB bounce buffers. It will + * mark the bounce buffers as decrypted so that their usage will + * not cause "plain-text" data to be decrypted when accessed. It + * must be called after late_time_init() so that Hyper-V x86/x64 + * hypercalls work when the SWIOTLB bounce buffers are decrypted. + */ + mem_encrypt_init(); + pid_idr_init(); anon_vma_init(); #ifdef CONFIG_X86 From 812b0597fb4043240724e4c7bed7ba1fe15c0e3f Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Sun, 26 Mar 2023 06:52:01 -0700 Subject: [PATCH 42/48] x86/hyperv: Change vTOM handling to use standard coco mechanisms Hyper-V guests on AMD SEV-SNP hardware have the option of using the "virtual Top Of Memory" (vTOM) feature specified by the SEV-SNP architecture. With vTOM, shared vs. private memory accesses are controlled by splitting the guest physical address space into two halves. vTOM is the dividing line where the uppermost bit of the physical address space is set; e.g., with 47 bits of guest physical address space, vTOM is 0x400000000000 (bit 46 is set). Guest physical memory is accessible at two parallel physical addresses -- one below vTOM and one above vTOM. Accesses below vTOM are private (encrypted) while accesses above vTOM are shared (decrypted). In this sense, vTOM is like the GPA.SHARED bit in Intel TDX. Support for Hyper-V guests using vTOM was added to the Linux kernel in two patch sets[1][2]. This support treats the vTOM bit as part of the physical address. For accessing shared (decrypted) memory, these patch sets create a second kernel virtual mapping that maps to physical addresses above vTOM. A better approach is to treat the vTOM bit as a protection flag, not as part of the physical address. This new approach is like the approach for the GPA.SHARED bit in Intel TDX. Rather than creating a second kernel virtual mapping, the existing mapping is updated using recently added coco mechanisms. When memory is changed between private and shared using set_memory_decrypted() and set_memory_encrypted(), the PTEs for the existing kernel mapping are changed to add or remove the vTOM bit in the guest physical address, just as with TDX. The hypercalls to change the memory status on the host side are made using the existing callback mechanism. Everything just works, with a minor tweak to map the IO-APIC to use private accesses. To accomplish the switch in approach, the following must be done: * Update Hyper-V initialization to set the cc_mask based on vTOM and do other coco initialization. * Update physical_mask so the vTOM bit is no longer treated as part of the physical address * Remove CC_VENDOR_HYPERV and merge the associated vTOM functionality under CC_VENDOR_AMD. Update cc_mkenc() and cc_mkdec() to set/clear the vTOM bit as a protection flag. * Code already exists to make hypercalls to inform Hyper-V about pages changing between shared and private. Update this code to run as a callback from __set_memory_enc_pgtable(). * Remove the Hyper-V special case from __set_memory_enc_dec() * Remove the Hyper-V specific call to swiotlb_update_mem_attributes() since mem_encrypt_init() will now do it. * Add a Hyper-V specific implementation of the is_private_mmio() callback that returns true for the IO-APIC and vTPM MMIO addresses [1] https://lore.kernel.org/all/20211025122116.264793-1-ltykernel@gmail.com/ [2] https://lore.kernel.org/all/20211213071407.314309-1-ltykernel@gmail.com/ [ bp: Touchups. ] Signed-off-by: Michael Kelley Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/1679838727-87310-7-git-send-email-mikelley@microsoft.com --- arch/x86/coco/core.c | 40 ++++++++++++----- arch/x86/hyperv/hv_init.c | 11 ----- arch/x86/hyperv/ivm.c | 72 +++++++++++++++++++++++++----- arch/x86/include/asm/coco.h | 1 - arch/x86/include/asm/mem_encrypt.h | 1 + arch/x86/include/asm/mshyperv.h | 16 ++++--- arch/x86/kernel/cpu/mshyperv.c | 15 +++---- arch/x86/mm/pat/set_memory.c | 3 -- drivers/hv/vmbus_drv.c | 1 - include/asm-generic/mshyperv.h | 2 + 10 files changed, 111 insertions(+), 51 deletions(-) diff --git a/arch/x86/coco/core.c b/arch/x86/coco/core.c index 49b44f881484..f4f0625691fd 100644 --- a/arch/x86/coco/core.c +++ b/arch/x86/coco/core.c @@ -29,6 +29,22 @@ static bool intel_cc_platform_has(enum cc_attr attr) } } +/* + * Handle the SEV-SNP vTOM case where sme_me_mask is zero, and + * the other levels of SME/SEV functionality, including C-bit + * based SEV-SNP, are not enabled. + */ +static __maybe_unused bool amd_cc_platform_vtom(enum cc_attr attr) +{ + switch (attr) { + case CC_ATTR_GUEST_MEM_ENCRYPT: + case CC_ATTR_MEM_ENCRYPT: + return true; + default: + return false; + } +} + /* * SME and SEV are very similar but they are not the same, so there are * times that the kernel will need to distinguish between SME and SEV. The @@ -41,9 +57,14 @@ static bool intel_cc_platform_has(enum cc_attr attr) * up under SME the trampoline area cannot be encrypted, whereas under SEV * the trampoline area must be encrypted. */ + static bool amd_cc_platform_has(enum cc_attr attr) { #ifdef CONFIG_AMD_MEM_ENCRYPT + + if (sev_status & MSR_AMD64_SNP_VTOM) + return amd_cc_platform_vtom(attr); + switch (attr) { case CC_ATTR_MEM_ENCRYPT: return sme_me_mask; @@ -76,11 +97,6 @@ static bool amd_cc_platform_has(enum cc_attr attr) #endif } -static bool hyperv_cc_platform_has(enum cc_attr attr) -{ - return attr == CC_ATTR_GUEST_MEM_ENCRYPT; -} - bool cc_platform_has(enum cc_attr attr) { switch (vendor) { @@ -88,8 +104,6 @@ bool cc_platform_has(enum cc_attr attr) return amd_cc_platform_has(attr); case CC_VENDOR_INTEL: return intel_cc_platform_has(attr); - case CC_VENDOR_HYPERV: - return hyperv_cc_platform_has(attr); default: return false; } @@ -103,11 +117,14 @@ u64 cc_mkenc(u64 val) * encryption status of the page. * * - for AMD, bit *set* means the page is encrypted - * - for Intel *clear* means encrypted. + * - for AMD with vTOM and for Intel, *clear* means encrypted */ switch (vendor) { case CC_VENDOR_AMD: - return val | cc_mask; + if (sev_status & MSR_AMD64_SNP_VTOM) + return val & ~cc_mask; + else + return val | cc_mask; case CC_VENDOR_INTEL: return val & ~cc_mask; default: @@ -120,7 +137,10 @@ u64 cc_mkdec(u64 val) /* See comment in cc_mkenc() */ switch (vendor) { case CC_VENDOR_AMD: - return val & ~cc_mask; + if (sev_status & MSR_AMD64_SNP_VTOM) + return val | cc_mask; + else + return val & ~cc_mask; case CC_VENDOR_INTEL: return val | cc_mask; default: diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 41ef036ebb7b..edbc67ec1f3e 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -29,7 +29,6 @@ #include #include #include -#include int hyperv_init_cpuhp; u64 hv_current_partition_id = ~0ull; @@ -504,16 +503,6 @@ void __init hyperv_init(void) /* Query the VMs extended capability once, so that it can be cached. */ hv_query_ext_cap(0); -#ifdef CONFIG_SWIOTLB - /* - * Swiotlb bounce buffer needs to be mapped in extra address - * space. Map function doesn't work in the early place and so - * call swiotlb_update_mem_attributes() here. - */ - if (hv_is_isolation_supported()) - swiotlb_update_mem_attributes(); -#endif - return; clean_guest_os_id: diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c index 5648efb6c73e..f6a020cb1a24 100644 --- a/arch/x86/hyperv/ivm.c +++ b/arch/x86/hyperv/ivm.c @@ -13,6 +13,8 @@ #include #include #include +#include +#include #include #include @@ -233,7 +235,6 @@ void hv_ghcb_msr_read(u64 msr, u64 *value) local_irq_restore(flags); } EXPORT_SYMBOL_GPL(hv_ghcb_msr_read); -#endif /* * hv_mark_gpa_visibility - Set pages visible to host via hvcall. @@ -286,27 +287,25 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[], } /* - * hv_set_mem_host_visibility - Set specified memory visible to host. + * hv_vtom_set_host_visibility - Set specified memory visible to host. * * In Isolation VM, all guest memory is encrypted from host and guest * needs to set memory visible to host via hvcall before sharing memory * with host. This function works as wrap of hv_mark_gpa_visibility() * with memory base and size. */ -int hv_set_mem_host_visibility(unsigned long kbuffer, int pagecount, bool visible) +static bool hv_vtom_set_host_visibility(unsigned long kbuffer, int pagecount, bool enc) { - enum hv_mem_host_visibility visibility = visible ? - VMBUS_PAGE_VISIBLE_READ_WRITE : VMBUS_PAGE_NOT_VISIBLE; + enum hv_mem_host_visibility visibility = enc ? + VMBUS_PAGE_NOT_VISIBLE : VMBUS_PAGE_VISIBLE_READ_WRITE; u64 *pfn_array; int ret = 0; + bool result = true; int i, pfn; - if (!hv_is_isolation_supported() || !hv_hypercall_pg) - return 0; - pfn_array = kmalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL); if (!pfn_array) - return -ENOMEM; + return false; for (i = 0, pfn = 0; i < pagecount; i++) { pfn_array[pfn] = virt_to_hvpfn((void *)kbuffer + i * HV_HYP_PAGE_SIZE); @@ -315,17 +314,68 @@ int hv_set_mem_host_visibility(unsigned long kbuffer, int pagecount, bool visibl if (pfn == HV_MAX_MODIFY_GPA_REP_COUNT || i == pagecount - 1) { ret = hv_mark_gpa_visibility(pfn, pfn_array, visibility); - if (ret) + if (ret) { + result = false; goto err_free_pfn_array; + } pfn = 0; } } err_free_pfn_array: kfree(pfn_array); - return ret; + return result; } +static bool hv_vtom_tlb_flush_required(bool private) +{ + return true; +} + +static bool hv_vtom_cache_flush_required(void) +{ + return false; +} + +static bool hv_is_private_mmio(u64 addr) +{ + /* + * Hyper-V always provides a single IO-APIC in a guest VM. + * When a paravisor is used, it is emulated by the paravisor + * in the guest context and must be mapped private. + */ + if (addr >= HV_IOAPIC_BASE_ADDRESS && + addr < (HV_IOAPIC_BASE_ADDRESS + PAGE_SIZE)) + return true; + + /* Same with a vTPM */ + if (addr >= VTPM_BASE_ADDRESS && + addr < (VTPM_BASE_ADDRESS + PAGE_SIZE)) + return true; + + return false; +} + +void __init hv_vtom_init(void) +{ + /* + * By design, a VM using vTOM doesn't see the SEV setting, + * so SEV initialization is bypassed and sev_status isn't set. + * Set it here to indicate a vTOM VM. + */ + sev_status = MSR_AMD64_SNP_VTOM; + cc_set_vendor(CC_VENDOR_AMD); + cc_set_mask(ms_hyperv.shared_gpa_boundary); + physical_mask &= ms_hyperv.shared_gpa_boundary - 1; + + x86_platform.hyper.is_private_mmio = hv_is_private_mmio; + x86_platform.guest.enc_cache_flush_required = hv_vtom_cache_flush_required; + x86_platform.guest.enc_tlb_flush_required = hv_vtom_tlb_flush_required; + x86_platform.guest.enc_status_change_finish = hv_vtom_set_host_visibility; +} + +#endif /* CONFIG_AMD_MEM_ENCRYPT */ + /* * hv_map_memory - map memory to extra space in the AMD SEV-SNP Isolation VM. */ diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h index 3d98c3a60d34..d2c6a2e8d04d 100644 --- a/arch/x86/include/asm/coco.h +++ b/arch/x86/include/asm/coco.h @@ -7,7 +7,6 @@ enum cc_vendor { CC_VENDOR_NONE, CC_VENDOR_AMD, - CC_VENDOR_HYPERV, CC_VENDOR_INTEL, }; diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h index 72ca90552b6a..b7126701574c 100644 --- a/arch/x86/include/asm/mem_encrypt.h +++ b/arch/x86/include/asm/mem_encrypt.h @@ -56,6 +56,7 @@ void __init sev_es_init_vc_handling(void); #else /* !CONFIG_AMD_MEM_ENCRYPT */ #define sme_me_mask 0ULL +#define sev_status 0ULL static inline void __init sme_early_encrypt(resource_size_t paddr, unsigned long size) { } diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 4c4c0ec3b62e..e3cef98a0142 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -11,6 +11,14 @@ #include #include +/* + * Hyper-V always provides a single IO-APIC at this MMIO address. + * Ideally, the value should be looked up in ACPI tables, but it + * is needed for mapping the IO-APIC early in boot on Confidential + * VMs, before ACPI functions can be used. + */ +#define HV_IOAPIC_BASE_ADDRESS 0xfec00000 + union hv_ghcb; DECLARE_STATIC_KEY_FALSE(isolation_type_snp); @@ -206,18 +214,19 @@ struct irq_domain *hv_create_pci_msi_domain(void); int hv_map_ioapic_interrupt(int ioapic_id, bool level, int vcpu, int vector, struct hv_interrupt_entry *entry); int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry); -int hv_set_mem_host_visibility(unsigned long addr, int numpages, bool visible); #ifdef CONFIG_AMD_MEM_ENCRYPT void hv_ghcb_msr_write(u64 msr, u64 value); void hv_ghcb_msr_read(u64 msr, u64 *value); bool hv_ghcb_negotiate_protocol(void); void hv_ghcb_terminate(unsigned int set, unsigned int reason); +void hv_vtom_init(void); #else static inline void hv_ghcb_msr_write(u64 msr, u64 value) {} static inline void hv_ghcb_msr_read(u64 msr, u64 *value) {} static inline bool hv_ghcb_negotiate_protocol(void) { return false; } static inline void hv_ghcb_terminate(unsigned int set, unsigned int reason) {} +static inline void hv_vtom_init(void) {} #endif extern bool hv_isolation_type_snp(void); @@ -259,11 +268,6 @@ static inline void hv_set_register(unsigned int reg, u64 value) { } static inline u64 hv_get_register(unsigned int reg) { return 0; } static inline void hv_set_non_nested_register(unsigned int reg, u64 value) { } static inline u64 hv_get_non_nested_register(unsigned int reg) { return 0; } -static inline int hv_set_mem_host_visibility(unsigned long addr, int numpages, - bool visible) -{ - return -1; -} #endif /* CONFIG_HYPERV */ diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index f36dc2f796c5..ded7506217f2 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -33,7 +33,6 @@ #include #include #include -#include /* Is Linux running as the root partition? */ bool hv_root_partition; @@ -397,8 +396,10 @@ static void __init ms_hyperv_init_platform(void) if (ms_hyperv.priv_high & HV_ISOLATION) { ms_hyperv.isolation_config_a = cpuid_eax(HYPERV_CPUID_ISOLATION_CONFIG); ms_hyperv.isolation_config_b = cpuid_ebx(HYPERV_CPUID_ISOLATION_CONFIG); - ms_hyperv.shared_gpa_boundary = - BIT_ULL(ms_hyperv.shared_gpa_boundary_bits); + + if (ms_hyperv.shared_gpa_boundary_active) + ms_hyperv.shared_gpa_boundary = + BIT_ULL(ms_hyperv.shared_gpa_boundary_bits); pr_info("Hyper-V: Isolation Config: Group A 0x%x, Group B 0x%x\n", ms_hyperv.isolation_config_a, ms_hyperv.isolation_config_b); @@ -409,11 +410,6 @@ static void __init ms_hyperv_init_platform(void) swiotlb_unencrypted_base = ms_hyperv.shared_gpa_boundary; #endif } - /* Isolation VMs are unenlightened SEV-based VMs, thus this check: */ - if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) { - if (hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE) - cc_set_vendor(CC_VENDOR_HYPERV); - } } if (hv_max_functions_eax >= HYPERV_CPUID_NESTED_FEATURES) { @@ -482,6 +478,9 @@ static void __init ms_hyperv_init_platform(void) i8253_clear_counter_on_shutdown = false; #if IS_ENABLED(CONFIG_HYPERV) + if ((hv_get_isolation_type() == HV_ISOLATION_TYPE_VBS) || + (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP)) + hv_vtom_init(); /* * Setup the hook to get control post apic initialization. */ diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 356758b7d4b4..b037954e0f61 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -2175,9 +2175,6 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc) static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc) { - if (hv_is_isolation_supported()) - return hv_set_mem_host_visibility(addr, numpages, !enc); - if (cc_platform_has(CC_ATTR_MEM_ENCRYPT)) return __set_memory_enc_pgtable(addr, numpages, enc); diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index d24dd65b33d4..e9e1c4139e0d 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -2156,7 +2156,6 @@ void vmbus_device_unregister(struct hv_device *device_obj) * VMBUS is an acpi enumerated device. Get the information we * need from DSDT. */ -#define VTPM_BASE_ADDRESS 0xfed40000 static acpi_status vmbus_walk_resources(struct acpi_resource *res, void *ctx) { resource_size_t start = 0; diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h index 8845a2eca339..90d7f68ed39d 100644 --- a/include/asm-generic/mshyperv.h +++ b/include/asm-generic/mshyperv.h @@ -26,6 +26,8 @@ #include #include +#define VTPM_BASE_ADDRESS 0xfed40000 + struct ms_hyperv_info { u32 features; u32 priv_high; From 01db1030f16535fa8f9a6bb1d6af9cda06977adb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= Date: Sun, 29 Jan 2023 17:50:54 +0100 Subject: [PATCH 43/48] EDAC/amd81[13]1: Remove trailing newline from MODULE_AUTHOR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MODULE_AUTHOR strings don't usually include a newline character. Signed-off-by: Jonathan Neuschäfer Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230129165054.1675554-1-j.neuschaefer@gmx.net --- drivers/edac/amd8111_edac.c | 2 +- drivers/edac/amd8131_edac.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/edac/amd8111_edac.c b/drivers/edac/amd8111_edac.c index 7508aa416ddb..ca718f63fcbc 100644 --- a/drivers/edac/amd8111_edac.c +++ b/drivers/edac/amd8111_edac.c @@ -593,5 +593,5 @@ module_init(amd8111_edac_init); module_exit(amd8111_edac_exit); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Cao Qingtao \n"); +MODULE_AUTHOR("Cao Qingtao "); MODULE_DESCRIPTION("AMD8111 HyperTransport I/O Hub EDAC kernel module"); diff --git a/drivers/edac/amd8131_edac.c b/drivers/edac/amd8131_edac.c index 169353710982..28610ba514f4 100644 --- a/drivers/edac/amd8131_edac.c +++ b/drivers/edac/amd8131_edac.c @@ -354,5 +354,5 @@ module_init(amd8131_edac_init); module_exit(amd8131_edac_exit); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Cao Qingtao \n"); +MODULE_AUTHOR("Cao Qingtao "); MODULE_DESCRIPTION("AMD8131 HyperTransport PCI-X Tunnel EDAC kernel module"); From 371b27f2f3b03588bcf472763f541050e18f21a3 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Tue, 28 Mar 2023 15:39:43 +0200 Subject: [PATCH 44/48] EDAC: Sanitize MODULE_AUTHOR strings Fixup the remaining MODULE_AUTHOR strings to not contain newlines. Shorten and unbreak others. No functional changes. Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230328134309.23159-1-bp@alien8.de --- drivers/edac/amd64_edac.c | 6 ++---- drivers/edac/e752x_edac.c | 2 +- drivers/edac/e7xxx_edac.c | 3 +-- drivers/edac/i5000_edac.c | 7 ++----- drivers/edac/i5100_edac.c | 3 +-- drivers/edac/i82860_edac.c | 3 +-- drivers/edac/layerscape_edac.c | 3 +-- drivers/edac/mpc85xx_edac.c | 3 +-- drivers/edac/r82600_edac.c | 3 +-- 9 files changed, 11 insertions(+), 22 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 5b42533f306a..8b16ebf5fe12 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -4244,10 +4244,8 @@ module_init(amd64_edac_init); module_exit(amd64_edac_exit); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("SoftwareBitMaker: Doug Thompson, " - "Dave Peterson, Thayne Harbaugh"); -MODULE_DESCRIPTION("MC support for AMD64 memory controllers - " - EDAC_AMD64_VERSION); +MODULE_AUTHOR("SoftwareBitMaker: Doug Thompson, Dave Peterson, Thayne Harbaugh; AMD"); +MODULE_DESCRIPTION("MC support for AMD64 memory controllers - " EDAC_AMD64_VERSION); module_param(edac_op_state, int, 0444); MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); diff --git a/drivers/edac/e752x_edac.c b/drivers/edac/e752x_edac.c index ac7c9b42d4c7..7221b4bb6df2 100644 --- a/drivers/edac/e752x_edac.c +++ b/drivers/edac/e752x_edac.c @@ -1462,7 +1462,7 @@ module_init(e752x_init); module_exit(e752x_exit); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Linux Networx (http://lnxi.com) Tom Zimmerman\n"); +MODULE_AUTHOR("Linux Networx (http://lnxi.com) Tom Zimmerman"); MODULE_DESCRIPTION("MC support for Intel e752x/3100 memory controllers"); module_param(force_function_unhide, int, 0444); diff --git a/drivers/edac/e7xxx_edac.c b/drivers/edac/e7xxx_edac.c index 497e710fca3d..5852b95fa470 100644 --- a/drivers/edac/e7xxx_edac.c +++ b/drivers/edac/e7xxx_edac.c @@ -596,8 +596,7 @@ module_init(e7xxx_init); module_exit(e7xxx_exit); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Linux Networx (http://lnxi.com) Thayne Harbaugh et al\n" - "Based on.work by Dan Hollis et al"); +MODULE_AUTHOR("Linux Networx (http://lnxi.com) Thayne Harbaugh et al"); MODULE_DESCRIPTION("MC support for Intel e7xxx memory controllers"); module_param(edac_op_state, int, 0444); MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); diff --git a/drivers/edac/i5000_edac.c b/drivers/edac/i5000_edac.c index ba46057d4220..4b5a71f8739d 100644 --- a/drivers/edac/i5000_edac.c +++ b/drivers/edac/i5000_edac.c @@ -1573,13 +1573,10 @@ module_init(i5000_init); module_exit(i5000_exit); MODULE_LICENSE("GPL"); -MODULE_AUTHOR - ("Linux Networx (http://lnxi.com) Doug Thompson "); -MODULE_DESCRIPTION("MC Driver for Intel I5000 memory controllers - " - I5000_REVISION); +MODULE_AUTHOR("Linux Networx (http://lnxi.com) Doug Thompson "); +MODULE_DESCRIPTION("MC Driver for Intel I5000 memory controllers - " I5000_REVISION); module_param(edac_op_state, int, 0444); MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); module_param(misc_messages, int, 0444); MODULE_PARM_DESC(misc_messages, "Log miscellaneous non fatal messages"); - diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c index 8db680b6ae9b..d470afe65001 100644 --- a/drivers/edac/i5100_edac.c +++ b/drivers/edac/i5100_edac.c @@ -1220,6 +1220,5 @@ module_init(i5100_init); module_exit(i5100_exit); MODULE_LICENSE("GPL"); -MODULE_AUTHOR - ("Arthur Jones "); +MODULE_AUTHOR("Arthur Jones "); MODULE_DESCRIPTION("MC Driver for Intel I5100 memory controllers"); diff --git a/drivers/edac/i82860_edac.c b/drivers/edac/i82860_edac.c index fbec90d00f1e..b8a497f0de28 100644 --- a/drivers/edac/i82860_edac.c +++ b/drivers/edac/i82860_edac.c @@ -355,8 +355,7 @@ module_init(i82860_init); module_exit(i82860_exit); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com) " - "Ben Woodard "); +MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com) Ben Woodard "); MODULE_DESCRIPTION("ECC support for Intel 82860 memory hub controllers"); module_param(edac_op_state, int, 0444); diff --git a/drivers/edac/layerscape_edac.c b/drivers/edac/layerscape_edac.c index 35ceaca578e1..7c5e2b3c0daa 100644 --- a/drivers/edac/layerscape_edac.c +++ b/drivers/edac/layerscape_edac.c @@ -72,5 +72,4 @@ module_exit(fsl_ddr_mc_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("NXP Semiconductor"); module_param(edac_op_state, int, 0444); -MODULE_PARM_DESC(edac_op_state, - "EDAC Error Reporting state: 0=Poll, 2=Interrupt"); +MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll, 2=Interrupt"); diff --git a/drivers/edac/mpc85xx_edac.c b/drivers/edac/mpc85xx_edac.c index e50d7928bf8f..55320546c174 100644 --- a/drivers/edac/mpc85xx_edac.c +++ b/drivers/edac/mpc85xx_edac.c @@ -711,5 +711,4 @@ module_exit(mpc85xx_mc_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Montavista Software, Inc."); module_param(edac_op_state, int, 0444); -MODULE_PARM_DESC(edac_op_state, - "EDAC Error Reporting state: 0=Poll, 2=Interrupt"); +MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll, 2=Interrupt"); diff --git a/drivers/edac/r82600_edac.c b/drivers/edac/r82600_edac.c index d0aef83dca2a..61e979d5437a 100644 --- a/drivers/edac/r82600_edac.c +++ b/drivers/edac/r82600_edac.c @@ -415,8 +415,7 @@ module_init(r82600_init); module_exit(r82600_exit); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Tim Small - WPAD Ltd. " - "on behalf of EADS Astrium"); +MODULE_AUTHOR("Tim Small - WPAD Ltd. on behalf of EADS Astrium"); MODULE_DESCRIPTION("MC support for Radisys 82600 memory controllers"); module_param(disable_hardware_scrub, bool, 0644); From e088d80e2a1e72646c4be64edcbd58ac71b96251 Mon Sep 17 00:00:00 2001 From: Nick Alcock Date: Fri, 17 Feb 2023 14:10:58 +0000 Subject: [PATCH 45/48] EDAC/altera: Remove MODULE_LICENSE in non-module Since 8b41fc4454e3 ("kbuild: create modules.builtin without Makefile.modbuiltin or tristate.conf"), MODULE_LICENSE declarations are used to identify modules. As a consequence, uses of the macro in non-modules will cause modprobe to misidentify their containing object file as a module when it is not (false positives), and modprobe might succeed rather than failing with a suitable error message. altera_edac is not a module for a while now, remove the macro call. Suggested-by: Luis Chamberlain Signed-off-by: Nick Alcock Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230217141059.392471-24-nick.alcock@oracle.com --- drivers/edac/altera_edac.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index 72fa49c44360..8b31cd54bdb6 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -2223,6 +2223,5 @@ static struct platform_driver altr_edac_a10_driver = { }; module_platform_driver(altr_edac_a10_driver); -MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Thor Thayer"); MODULE_DESCRIPTION("EDAC Driver for Altera Memories"); From 49aba1c5896b745a093318914f3fd128f5c9191e Mon Sep 17 00:00:00 2001 From: Yang Li Date: Tue, 4 Apr 2023 10:25:57 +0800 Subject: [PATCH 46/48] EDAC/amd64: Fix indentation in umc_determine_edac_cap() Use consistent indentation to improve the readability and fix: drivers/edac/amd64_edac.c:1279 umc_determine_edac_cap() warn: inconsistent indenting Fixes: f6a4b4a1aa16 ("EDAC/amd64: Split determine_edac_cap() into dct/umc functions") Reported-by: Abaci Robot Signed-off-by: Yang Li Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230404022557.46409-1-yang.lee@linux.alibaba.com --- drivers/edac/amd64_edac.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 85b460b67838..b55129425c81 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -1276,19 +1276,19 @@ static unsigned long umc_determine_edac_cap(struct amd64_pvt *pvt) u8 i, umc_en_mask = 0, dimm_ecc_en_mask = 0; unsigned long edac_cap = EDAC_FLAG_NONE; - for_each_umc(i) { - if (!(pvt->umc[i].sdp_ctrl & UMC_SDP_INIT)) - continue; + for_each_umc(i) { + if (!(pvt->umc[i].sdp_ctrl & UMC_SDP_INIT)) + continue; - umc_en_mask |= BIT(i); + umc_en_mask |= BIT(i); - /* UMC Configuration bit 12 (DimmEccEn) */ - if (pvt->umc[i].umc_cfg & BIT(12)) - dimm_ecc_en_mask |= BIT(i); - } + /* UMC Configuration bit 12 (DimmEccEn) */ + if (pvt->umc[i].umc_cfg & BIT(12)) + dimm_ecc_en_mask |= BIT(i); + } - if (umc_en_mask == dimm_ecc_en_mask) - edac_cap = EDAC_FLAG_SECDED; + if (umc_en_mask == dimm_ecc_en_mask) + edac_cap = EDAC_FLAG_SECDED; return edac_cap; } From 36168bc061b4368ad19e82b06a6463c95d3bb9a7 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Tue, 4 Apr 2023 14:21:24 -0700 Subject: [PATCH 47/48] x86/cpu: Add Xeon Emerald Rapids to list of CPUs that support PPIN This should be the last addition to this table. Future CPUs will enumerate PPIN support using CPUID. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20230404212124.428118-1-tony.luck@intel.com --- arch/x86/kernel/cpu/common.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 8cd4126d8253..80710a68ef7d 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -121,6 +121,7 @@ static const struct x86_cpu_id ppin_cpuids[] = { X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &ppin_info[X86_VENDOR_INTEL]), X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &ppin_info[X86_VENDOR_INTEL]), X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &ppin_info[X86_VENDOR_INTEL]), + X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &ppin_info[X86_VENDOR_INTEL]), X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &ppin_info[X86_VENDOR_INTEL]), X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &ppin_info[X86_VENDOR_INTEL]), From 96ae3995c6930d2b6c11a48dad9a59ddd34abaad Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Mon, 10 Apr 2023 21:15:31 +0800 Subject: [PATCH 48/48] EDAC/i10nm: Add Intel Sierra Forest server support The Sierra Forest CPU model uses similar memory controller registers as Granite Rapids server. Add Sierra Forest CPU model ID for EDAC support. Tested-by: Li Zhang Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Link: https://lore.kernel.org/r/20230410131531.11914-1-qiuxu.zhuo@intel.com --- drivers/edac/i10nm_base.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index 0a4691792801..a897b6aff368 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -906,6 +906,7 @@ static const struct x86_cpu_id i10nm_cpuids[] = { X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SAPPHIRERAPIDS_X, X86_STEPPINGS(0x0, 0xf), &spr_cfg), X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(EMERALDRAPIDS_X, X86_STEPPINGS(0x0, 0xf), &spr_cfg), X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(GRANITERAPIDS_X, X86_STEPPINGS(0x0, 0xf), &gnr_cfg), + X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SIERRAFOREST_X, X86_STEPPINGS(0x0, 0xf), &gnr_cfg), {} }; MODULE_DEVICE_TABLE(x86cpu, i10nm_cpuids);