diff --git a/arch/arm64/kvm/hyp/include/nvhe/trace/trace.h b/arch/arm64/kvm/hyp/include/nvhe/trace/trace.h index 27127cf76725..a4774f86116c 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/trace/trace.h +++ b/arch/arm64/kvm/hyp/include/nvhe/trace/trace.h @@ -77,7 +77,19 @@ do { \ #define trace_hyp_printk(fmt, ...) \ __trace_hyp_printk_N(fmt, __VA_ARGS__) + +#ifdef CONFIG_PROTECTED_NVHE_FTRACE +void hyp_ftrace_setup_core(void); +int hyp_ftrace_setup(unsigned long *funcs, unsigned long *funcs_end, + unsigned long hyp_kern_offset, void *tramp); +void hyp_ftrace_ret_flush(void); #else +static inline void hyp_ftrace_setup_core(void) { } +static inline void hyp_ftrace_ret_flush(void) { } +static inline int hyp_ftrace_setup(unsigned long *funcs, unsigned long *funcs_end, + unsigned long hyp_kern_offset, void *tramp) { return 0; } +#endif /* CONFIG_PROTECTED_NVHE_FTRACE */ +#else /* CONFIG_TRACING */ static inline void *tracing_reserve_entry(unsigned long length) { return NULL; } static inline void tracing_commit_entry(void) { } static inline int register_hyp_event_ids(void *event_ids, size_t nr_events) @@ -97,5 +109,8 @@ static inline int __pkvm_reset_tracing(unsigned int cpu) { return -ENODEV; } static inline int __pkvm_swap_reader_tracing(unsigned int cpu) { return -ENODEV; } static inline int __pkvm_enable_event(unsigned short id, bool enable) { return -ENODEV; } #define trace_hyp_printk(fmt, ...) + +static inline void hyp_ftrace_setup_core(void) { } +static inline void hyp_ftrace_ret_flush(void) { } #endif #endif diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile index bd1fb2453999..ffd00fb07451 100644 --- a/arch/arm64/kvm/hyp/nvhe/Makefile +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -13,6 +13,7 @@ hyp-obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ ../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o hyp-obj-$(CONFIG_LIST_HARDENED) += list_debug.o hyp-obj-$(CONFIG_TRACING) += clock.o events.o trace.o +hyp-obj-$(CONFIG_PROTECTED_NVHE_FTRACE) += ftrace.o hyp-obj-$(CONFIG_MODULES) += modules.o hyp-obj-y += $(lib-objs) diff --git a/arch/arm64/kvm/hyp/nvhe/ftrace.c b/arch/arm64/kvm/hyp/nvhe/ftrace.c new file mode 100644 index 000000000000..992b5a30e404 --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/ftrace.c @@ -0,0 +1,284 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* + * Copyright (C) 2025 Google LLC + * Author: Vincent Donnefort + */ + +#include + +#include +#include + +#include + +#define HYP_FTRACE_MAX_OFFSETS 17 /* MAX_MOD_EVENTS + 1 */ +#define HYP_FTRACE_MAX_DEPTH 32 + +extern unsigned long hyp_nr_cpus; + +extern void __hyp_ftrace_tramp(void); +extern void __hyp_ftrace_ret_tramp(void); + +static unsigned long hyp_kern_offsets[HYP_FTRACE_MAX_OFFSETS]; + +static unsigned long __kern_addr(unsigned long offset_idx, unsigned long addr) +{ + return addr + hyp_kern_offsets[offset_idx]; +} + +struct hyp_ftrace_stack_frame { + unsigned long func; + unsigned long ret; +}; + +struct hyp_ftrace_stack { + int idx; + struct hyp_ftrace_stack_frame frames[HYP_FTRACE_MAX_DEPTH]; +}; + +static DEFINE_PER_CPU(struct hyp_ftrace_stack, __ftrace_saved_frames); + +static void hyp_ftrace_func_reset(void) +{ + unsigned int cpu; + + for (cpu = 0; cpu < hyp_nr_cpus; cpu++) { + struct hyp_ftrace_stack *stack; + + stack = per_cpu_ptr(&__ftrace_saved_frames, cpu); + stack->idx = -1; + } + + /* + * Make sure the stack init is observed by all CPUs before patching the + * code. Paired with smp_load_acquire() in hyp_ftrace_func_push(). + */ + smp_mb(); +} + +static __always_inline bool hyp_ftrace_func_push(unsigned long func, unsigned long ret) +{ + struct hyp_ftrace_stack *stack = this_cpu_ptr(&__ftrace_saved_frames); + int idx = smp_load_acquire(&stack->idx); + + if (idx >= (HYP_FTRACE_MAX_DEPTH - 1)) + return false; + + idx++; + stack->frames[idx].func = func; + stack->frames[idx].ret = ret; + stack->idx = idx; + + return true; +} + +static __always_inline struct hyp_ftrace_stack_frame *hyp_ftrace_func_pop(void) +{ + struct hyp_ftrace_stack *stack = this_cpu_ptr(&__ftrace_saved_frames); + + /* + * If in _pop(), then _push() has run on this CPU. No need for more + * memory ordering. + */ + + if (stack->idx < 0) + return NULL; + + return &stack->frames[stack->idx--]; +} + +unsigned long __hyp_ftrace_trace(unsigned long ip, unsigned long parent, + unsigned long offset_idx) +{ + unsigned long func = __kern_addr(offset_idx, ip); + unsigned long parent_offset_idx; + + /* When modules are called from core */ + parent_offset_idx = parent > (unsigned long)__hyp_text_start ? 0 : offset_idx; + + /* Only install the trampoline if we can revert to the original parent */ + if (hyp_ftrace_func_push(func, parent)) + return (unsigned long)__hyp_ftrace_ret_tramp; + + return parent; +} + +unsigned long __hyp_ftrace_ret_trace(void) +{ + struct hyp_ftrace_stack_frame *frame = hyp_ftrace_func_pop(); + + BUG_ON(!frame); + + return frame->ret; +} + +void hyp_ftrace_ret_flush(void) +{ + struct hyp_ftrace_stack_frame *frame = hyp_ftrace_func_pop(); + + while (frame) + frame = hyp_ftrace_func_pop(); +} + +static int __get_offset_idx_ins(unsigned long *func, unsigned long ip, u32 *insn, + void *args) +{ + unsigned long idx = (unsigned long)args; + u32 imm, mask = (BIT(16) - 1) << 5; + + imm = (idx << 5) & mask; + + *insn = aarch64_insn_get_movz_value(); + *insn |= BIT(31); /* 64-bits variant */ + *insn |= 10; /* x10 */ + *insn &= ~mask; + *insn |= imm; + *insn = cpu_to_le32(*insn); + + return 0; +} + +static int __get_enable_ins(unsigned long ip, u32 *insn, void *tramp) +{ + u32 imm, mask; + long delta; + + delta = (long)tramp - (long)ip; + + if (delta > SZ_128M || delta <= -SZ_128M) + return -ERANGE; + + mask = BIT(26) - 1; + imm = (delta >> 2) & mask; + + *insn = aarch64_insn_get_bl_value() & ~(mask); + *insn |= imm; + *insn = cpu_to_le32(*insn); + + return 0; +} + +#define funcs_pg_enabled(func) ((func) & 0x1) +#define funcs_pg_func(func) ((func) & ~BIT(0)) +#define funcs_pg_is_end(func) \ +({ \ + (!(*(func)) || \ + ((PAGE_ALIGN((unsigned long)(func) + 1) - (unsigned long)(func)) <= 8)); \ +}) + +/* + * During init the kernel can notify a function needs to be enabled. This is + * relying on the same encoding as the func_pg. + */ +#define get_func(func) funcs_pg_func(func) + +static int __get_enable_disable_ins_early(unsigned long *func, unsigned long ip, + u32 *insn, void *tramp) +{ + if (funcs_pg_enabled(*func)) + return __get_enable_ins(ip, insn, tramp); + + /* Nothing else to do */ + return 1; +} + +phys_addr_t __get_phys(unsigned long addr) +{ + if (addr >= (unsigned long)__hyp_text_start) + return __hyp_pa(addr); + + return __pkvm_private_range_pa((void *)addr); +} + +#define HYP_FTRACE_SKIP_FUNC (-1ULL) + +static void hyp_ftrace_patch(unsigned long *funcs, unsigned long *funcs_end, + size_t func_offset, + int (*get_ins)(unsigned long *func, unsigned long ip, + u32 *insn, void *args), + void *args) +{ + unsigned long prev_ip; + void *map = NULL; + + while (funcs < funcs_end) { + unsigned long ip; + size_t delta; + u32 insn; + + if (!*funcs) + break; + + if (*funcs == HYP_FTRACE_SKIP_FUNC) + goto next; + + ip = get_func(*funcs) + func_offset; + delta = ip - prev_ip; + + if (!map) { + map = hyp_fixmap_map(__get_phys(ip)); + } else if ((unsigned long)(map + delta) >= + PAGE_ALIGN((unsigned long)map + 4)) { + hyp_fixmap_unmap(); + map = hyp_fixmap_map(__get_phys(ip)); + } else { + map = (void *)PAGE_ALIGN_DOWN((unsigned long)map) + + offset_in_page(ip); + } + + prev_ip = ip; + + if (get_ins(funcs, ip, &insn, args)) + goto next; + + WRITE_ONCE(*(u32 *)map, insn); + + caches_clean_inval_pou((unsigned long)map, + (unsigned long)map + AARCH64_INSN_SIZE); +next: + funcs++; + } + + if (map) + hyp_fixmap_unmap(); +} + +int hyp_ftrace_setup(unsigned long *funcs, unsigned long *funcs_end, + unsigned long hyp_kern_offset, void *tramp) +{ + unsigned long idx; + + for (idx = 0; idx < HYP_FTRACE_MAX_OFFSETS; idx++) { + if (!hyp_kern_offsets[idx]) + break; + } + + if (idx >= HYP_FTRACE_MAX_OFFSETS) + return -ENOMEM; + + hyp_kern_offsets[idx] = hyp_kern_offset; + + hyp_ftrace_patch(funcs, funcs_end, AARCH64_INSN_SIZE, + __get_offset_idx_ins, (void *)idx); + + hyp_ftrace_patch(funcs, funcs_end, 2 * AARCH64_INSN_SIZE, + __get_enable_disable_ins_early, tramp); + + return idx; +} + +extern unsigned long __hyp_patchable_function_entries_start[]; +extern unsigned long __hyp_patchable_function_entries_end[]; + +unsigned long __hyp_text_start_kern; + +void hyp_ftrace_setup_core(void) +{ + hyp_ftrace_func_reset(); + + hyp_ftrace_setup(__hyp_patchable_function_entries_start, + __hyp_patchable_function_entries_end, + __hyp_text_start_kern - (unsigned long)__hyp_text_start, + __hyp_ftrace_tramp); +} diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c index aa232fe33c33..a56f98faed00 100644 --- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c +++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c @@ -249,6 +249,7 @@ asmlinkage void __noreturn __kvm_host_psci_cpu_entry(bool is_cpu_on) pkvm_psci_notify(PKVM_PSCI_CPU_ENTRY, host_ctxt); __hyp_exit(); + hyp_ftrace_ret_flush(); __host_enter(host_ctxt); } diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index 729c37bce0bc..ae16d88cb30b 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -19,6 +19,7 @@ #include #include #include +#include #include unsigned long hyp_nr_cpus; @@ -371,6 +372,8 @@ void __noreturn __pkvm_init_finalise(void) if (ret) goto out; + hyp_ftrace_setup_core(); + ret = fix_host_ownership(); if (ret) goto out; diff --git a/arch/arm64/kvm/hyp_events.c b/arch/arm64/kvm/hyp_events.c index 090eb40ad722..3972da00d3ff 100644 --- a/arch/arm64/kvm/hyp_events.c +++ b/arch/arm64/kvm/hyp_events.c @@ -3,10 +3,13 @@ * Copyright (C) 2023 Google LLC */ -#include +#include #include +#include #include +#include +#include #include #include "hyp_trace.h" @@ -98,6 +101,89 @@ static const char *hyp_printk_fmt_from_id(u8 fmt_id) return fmt ? fmt->fmt : "Unknown Format"; } +#ifdef CONFIG_PROTECTED_NVHE_FTRACE +extern unsigned long __hyp_patchable_function_entries_start[]; +extern unsigned long __hyp_patchable_function_entries_end[]; +extern unsigned long kvm_nvhe_sym(__hyp_text_start_kern); + +static int hyp_ftrace_init_lr_ins(unsigned long addr) +{ + u32 old, new; + + if (aarch64_insn_read((void *)addr, &old)) + return -EFAULT; + + if (old != aarch64_insn_gen_nop()) + return -EINVAL; + + new = aarch64_insn_gen_move_reg(AARCH64_INSN_REG_9, + AARCH64_INSN_REG_LR, + AARCH64_INSN_VARIANT_64BIT); + if (aarch64_insn_patch_text_nosync((void *)addr, new)) + return -EPERM; + + return 0; +} + +/* Instructions are word-aligned, let's repurpose the LSB */ +#define func_enable(func) ((func) | 0x1) + +#define HYP_FTRACE_SKIP_FUNC (-1ULL) + +static void hyp_ftrace_funcs_init(unsigned long *funcs, unsigned long *funcs_end, + unsigned long hyp_kern_offset, bool clear) +{ + unsigned long *func; + int ret; + + func = funcs; + while (func < funcs_end) { + unsigned long kern_addr = *func + hyp_kern_offset; + char sym[KSYM_SYMBOL_LEN]; + + if (!*func) + break; + + if (clear) + goto skip; + + sprint_symbol_no_offset(sym, kern_addr); + if (!strncmp(sym, "__kvm_nvhe_$", 12)) + goto skip; + + ret = hyp_ftrace_init_lr_ins(kern_addr); + if (ret) { + pr_warn("Failed to patch %ps (%d)\n", (void *)kern_addr, ret); + goto skip; + } + + *func = func_enable(*func); + goto next; + +skip: + *func = HYP_FTRACE_SKIP_FUNC; +next: + func++; + } +} + +static void hyp_ftrace_init(void) +{ + unsigned long hyp_base; + + hyp_base = (unsigned long)kern_hyp_va(lm_alias((unsigned long)__hyp_text_start)); + + hyp_ftrace_funcs_init(__hyp_patchable_function_entries_start, + __hyp_patchable_function_entries_end, + (unsigned long)__hyp_text_start - hyp_base, false); + + /* For the hypervisor to compute its hyp_kern_offset */ + kvm_nvhe_sym(__hyp_text_start_kern) = (unsigned long)__hyp_text_start; +} +#else +static void hyp_ftrace_init(void) { } +#endif + extern struct hyp_event __hyp_events_start[]; extern struct hyp_event __hyp_events_end[]; @@ -458,6 +544,7 @@ int hyp_trace_init_events(void) int nr_events = nr_entries(__hyp_events_start, __hyp_events_end); int nr_event_ids = nr_entries(__hyp_event_ids_start, __hyp_event_ids_end); int nr_printk_fmts = nr_entries(__hyp_printk_fmts_start, __hyp_printk_fmts_end); + int ret; /* __hyp_printk event only supports U8_MAX different formats */ WARN_ON(nr_printk_fmts > U8_MAX); @@ -467,8 +554,14 @@ int hyp_trace_init_events(void) if (WARN(nr_events != nr_event_ids, "Too many trace_hyp_printk()!")) return -EINVAL; - return hyp_event_table_init(__hyp_events_start, __hyp_event_ids_start, - nr_events); + ret = hyp_event_table_init(__hyp_events_start, __hyp_event_ids_start, + nr_events); + if (ret) + return ret; + + hyp_ftrace_init(); + + return 0; } int hyp_trace_init_mod_events(struct hyp_event *event,