From e5a5778f97569eaad67f9b89f20e19eb2adf76c9 Mon Sep 17 00:00:00 2001 From: Vincent Donnefort Date: Tue, 1 Oct 2024 15:12:26 +0100 Subject: [PATCH] ANDROID: KVM: arm64: Add Ftrace patching for pKVM hyp Ftrace for the pKVM hypervisor needs 3 instructions: 1. mov x9 LR # Setup in the kernel 2. mov x10 offset_idx # Setup in the hypervisor 3. bl trampoline # Enablement in the hypervisor The first instruction allows saving the parent address of the traced function. The second is to know what conversion hyp-to-kern VA apply (useful for modules). The last one is the actual jump to the ftrace trampoline. Functions are patched according to the __patchable_function_entries ELF section which store the list of traceable functions. In this list, the first bit can be set to indicate ftrace must be enabled for that function. At the moment all functions are enabled by default. Bug: 357781595 Change-Id: Ia946b6d9c3d739fd19ff5f496f99fbc611972078 Signed-off-by: Vincent Donnefort --- arch/arm64/kvm/hyp/include/nvhe/trace/trace.h | 15 + arch/arm64/kvm/hyp/nvhe/Makefile | 1 + arch/arm64/kvm/hyp/nvhe/ftrace.c | 284 ++++++++++++++++++ arch/arm64/kvm/hyp/nvhe/psci-relay.c | 1 + arch/arm64/kvm/hyp/nvhe/setup.c | 3 + arch/arm64/kvm/hyp_events.c | 99 +++++- 6 files changed, 400 insertions(+), 3 deletions(-) create mode 100644 arch/arm64/kvm/hyp/nvhe/ftrace.c diff --git a/arch/arm64/kvm/hyp/include/nvhe/trace/trace.h b/arch/arm64/kvm/hyp/include/nvhe/trace/trace.h index 27127cf76725..a4774f86116c 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/trace/trace.h +++ b/arch/arm64/kvm/hyp/include/nvhe/trace/trace.h @@ -77,7 +77,19 @@ do { \ #define trace_hyp_printk(fmt, ...) \ __trace_hyp_printk_N(fmt, __VA_ARGS__) + +#ifdef CONFIG_PROTECTED_NVHE_FTRACE +void hyp_ftrace_setup_core(void); +int hyp_ftrace_setup(unsigned long *funcs, unsigned long *funcs_end, + unsigned long hyp_kern_offset, void *tramp); +void hyp_ftrace_ret_flush(void); #else +static inline void hyp_ftrace_setup_core(void) { } +static inline void hyp_ftrace_ret_flush(void) { } +static inline int hyp_ftrace_setup(unsigned long *funcs, unsigned long *funcs_end, + unsigned long hyp_kern_offset, void *tramp) { return 0; } +#endif /* CONFIG_PROTECTED_NVHE_FTRACE */ +#else /* CONFIG_TRACING */ static inline void *tracing_reserve_entry(unsigned long length) { return NULL; } static inline void tracing_commit_entry(void) { } static inline int register_hyp_event_ids(void *event_ids, size_t nr_events) @@ -97,5 +109,8 @@ static inline int __pkvm_reset_tracing(unsigned int cpu) { return -ENODEV; } static inline int __pkvm_swap_reader_tracing(unsigned int cpu) { return -ENODEV; } static inline int __pkvm_enable_event(unsigned short id, bool enable) { return -ENODEV; } #define trace_hyp_printk(fmt, ...) + +static inline void hyp_ftrace_setup_core(void) { } +static inline void hyp_ftrace_ret_flush(void) { } #endif #endif diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile index bd1fb2453999..ffd00fb07451 100644 --- a/arch/arm64/kvm/hyp/nvhe/Makefile +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -13,6 +13,7 @@ hyp-obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ ../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o hyp-obj-$(CONFIG_LIST_HARDENED) += list_debug.o hyp-obj-$(CONFIG_TRACING) += clock.o events.o trace.o +hyp-obj-$(CONFIG_PROTECTED_NVHE_FTRACE) += ftrace.o hyp-obj-$(CONFIG_MODULES) += modules.o hyp-obj-y += $(lib-objs) diff --git a/arch/arm64/kvm/hyp/nvhe/ftrace.c b/arch/arm64/kvm/hyp/nvhe/ftrace.c new file mode 100644 index 000000000000..992b5a30e404 --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/ftrace.c @@ -0,0 +1,284 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* + * Copyright (C) 2025 Google LLC + * Author: Vincent Donnefort + */ + +#include + +#include +#include + +#include + +#define HYP_FTRACE_MAX_OFFSETS 17 /* MAX_MOD_EVENTS + 1 */ +#define HYP_FTRACE_MAX_DEPTH 32 + +extern unsigned long hyp_nr_cpus; + +extern void __hyp_ftrace_tramp(void); +extern void __hyp_ftrace_ret_tramp(void); + +static unsigned long hyp_kern_offsets[HYP_FTRACE_MAX_OFFSETS]; + +static unsigned long __kern_addr(unsigned long offset_idx, unsigned long addr) +{ + return addr + hyp_kern_offsets[offset_idx]; +} + +struct hyp_ftrace_stack_frame { + unsigned long func; + unsigned long ret; +}; + +struct hyp_ftrace_stack { + int idx; + struct hyp_ftrace_stack_frame frames[HYP_FTRACE_MAX_DEPTH]; +}; + +static DEFINE_PER_CPU(struct hyp_ftrace_stack, __ftrace_saved_frames); + +static void hyp_ftrace_func_reset(void) +{ + unsigned int cpu; + + for (cpu = 0; cpu < hyp_nr_cpus; cpu++) { + struct hyp_ftrace_stack *stack; + + stack = per_cpu_ptr(&__ftrace_saved_frames, cpu); + stack->idx = -1; + } + + /* + * Make sure the stack init is observed by all CPUs before patching the + * code. Paired with smp_load_acquire() in hyp_ftrace_func_push(). + */ + smp_mb(); +} + +static __always_inline bool hyp_ftrace_func_push(unsigned long func, unsigned long ret) +{ + struct hyp_ftrace_stack *stack = this_cpu_ptr(&__ftrace_saved_frames); + int idx = smp_load_acquire(&stack->idx); + + if (idx >= (HYP_FTRACE_MAX_DEPTH - 1)) + return false; + + idx++; + stack->frames[idx].func = func; + stack->frames[idx].ret = ret; + stack->idx = idx; + + return true; +} + +static __always_inline struct hyp_ftrace_stack_frame *hyp_ftrace_func_pop(void) +{ + struct hyp_ftrace_stack *stack = this_cpu_ptr(&__ftrace_saved_frames); + + /* + * If in _pop(), then _push() has run on this CPU. No need for more + * memory ordering. + */ + + if (stack->idx < 0) + return NULL; + + return &stack->frames[stack->idx--]; +} + +unsigned long __hyp_ftrace_trace(unsigned long ip, unsigned long parent, + unsigned long offset_idx) +{ + unsigned long func = __kern_addr(offset_idx, ip); + unsigned long parent_offset_idx; + + /* When modules are called from core */ + parent_offset_idx = parent > (unsigned long)__hyp_text_start ? 0 : offset_idx; + + /* Only install the trampoline if we can revert to the original parent */ + if (hyp_ftrace_func_push(func, parent)) + return (unsigned long)__hyp_ftrace_ret_tramp; + + return parent; +} + +unsigned long __hyp_ftrace_ret_trace(void) +{ + struct hyp_ftrace_stack_frame *frame = hyp_ftrace_func_pop(); + + BUG_ON(!frame); + + return frame->ret; +} + +void hyp_ftrace_ret_flush(void) +{ + struct hyp_ftrace_stack_frame *frame = hyp_ftrace_func_pop(); + + while (frame) + frame = hyp_ftrace_func_pop(); +} + +static int __get_offset_idx_ins(unsigned long *func, unsigned long ip, u32 *insn, + void *args) +{ + unsigned long idx = (unsigned long)args; + u32 imm, mask = (BIT(16) - 1) << 5; + + imm = (idx << 5) & mask; + + *insn = aarch64_insn_get_movz_value(); + *insn |= BIT(31); /* 64-bits variant */ + *insn |= 10; /* x10 */ + *insn &= ~mask; + *insn |= imm; + *insn = cpu_to_le32(*insn); + + return 0; +} + +static int __get_enable_ins(unsigned long ip, u32 *insn, void *tramp) +{ + u32 imm, mask; + long delta; + + delta = (long)tramp - (long)ip; + + if (delta > SZ_128M || delta <= -SZ_128M) + return -ERANGE; + + mask = BIT(26) - 1; + imm = (delta >> 2) & mask; + + *insn = aarch64_insn_get_bl_value() & ~(mask); + *insn |= imm; + *insn = cpu_to_le32(*insn); + + return 0; +} + +#define funcs_pg_enabled(func) ((func) & 0x1) +#define funcs_pg_func(func) ((func) & ~BIT(0)) +#define funcs_pg_is_end(func) \ +({ \ + (!(*(func)) || \ + ((PAGE_ALIGN((unsigned long)(func) + 1) - (unsigned long)(func)) <= 8)); \ +}) + +/* + * During init the kernel can notify a function needs to be enabled. This is + * relying on the same encoding as the func_pg. + */ +#define get_func(func) funcs_pg_func(func) + +static int __get_enable_disable_ins_early(unsigned long *func, unsigned long ip, + u32 *insn, void *tramp) +{ + if (funcs_pg_enabled(*func)) + return __get_enable_ins(ip, insn, tramp); + + /* Nothing else to do */ + return 1; +} + +phys_addr_t __get_phys(unsigned long addr) +{ + if (addr >= (unsigned long)__hyp_text_start) + return __hyp_pa(addr); + + return __pkvm_private_range_pa((void *)addr); +} + +#define HYP_FTRACE_SKIP_FUNC (-1ULL) + +static void hyp_ftrace_patch(unsigned long *funcs, unsigned long *funcs_end, + size_t func_offset, + int (*get_ins)(unsigned long *func, unsigned long ip, + u32 *insn, void *args), + void *args) +{ + unsigned long prev_ip; + void *map = NULL; + + while (funcs < funcs_end) { + unsigned long ip; + size_t delta; + u32 insn; + + if (!*funcs) + break; + + if (*funcs == HYP_FTRACE_SKIP_FUNC) + goto next; + + ip = get_func(*funcs) + func_offset; + delta = ip - prev_ip; + + if (!map) { + map = hyp_fixmap_map(__get_phys(ip)); + } else if ((unsigned long)(map + delta) >= + PAGE_ALIGN((unsigned long)map + 4)) { + hyp_fixmap_unmap(); + map = hyp_fixmap_map(__get_phys(ip)); + } else { + map = (void *)PAGE_ALIGN_DOWN((unsigned long)map) + + offset_in_page(ip); + } + + prev_ip = ip; + + if (get_ins(funcs, ip, &insn, args)) + goto next; + + WRITE_ONCE(*(u32 *)map, insn); + + caches_clean_inval_pou((unsigned long)map, + (unsigned long)map + AARCH64_INSN_SIZE); +next: + funcs++; + } + + if (map) + hyp_fixmap_unmap(); +} + +int hyp_ftrace_setup(unsigned long *funcs, unsigned long *funcs_end, + unsigned long hyp_kern_offset, void *tramp) +{ + unsigned long idx; + + for (idx = 0; idx < HYP_FTRACE_MAX_OFFSETS; idx++) { + if (!hyp_kern_offsets[idx]) + break; + } + + if (idx >= HYP_FTRACE_MAX_OFFSETS) + return -ENOMEM; + + hyp_kern_offsets[idx] = hyp_kern_offset; + + hyp_ftrace_patch(funcs, funcs_end, AARCH64_INSN_SIZE, + __get_offset_idx_ins, (void *)idx); + + hyp_ftrace_patch(funcs, funcs_end, 2 * AARCH64_INSN_SIZE, + __get_enable_disable_ins_early, tramp); + + return idx; +} + +extern unsigned long __hyp_patchable_function_entries_start[]; +extern unsigned long __hyp_patchable_function_entries_end[]; + +unsigned long __hyp_text_start_kern; + +void hyp_ftrace_setup_core(void) +{ + hyp_ftrace_func_reset(); + + hyp_ftrace_setup(__hyp_patchable_function_entries_start, + __hyp_patchable_function_entries_end, + __hyp_text_start_kern - (unsigned long)__hyp_text_start, + __hyp_ftrace_tramp); +} diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c index aa232fe33c33..a56f98faed00 100644 --- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c +++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c @@ -249,6 +249,7 @@ asmlinkage void __noreturn __kvm_host_psci_cpu_entry(bool is_cpu_on) pkvm_psci_notify(PKVM_PSCI_CPU_ENTRY, host_ctxt); __hyp_exit(); + hyp_ftrace_ret_flush(); __host_enter(host_ctxt); } diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index 729c37bce0bc..ae16d88cb30b 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -19,6 +19,7 @@ #include #include #include +#include #include unsigned long hyp_nr_cpus; @@ -371,6 +372,8 @@ void __noreturn __pkvm_init_finalise(void) if (ret) goto out; + hyp_ftrace_setup_core(); + ret = fix_host_ownership(); if (ret) goto out; diff --git a/arch/arm64/kvm/hyp_events.c b/arch/arm64/kvm/hyp_events.c index 090eb40ad722..3972da00d3ff 100644 --- a/arch/arm64/kvm/hyp_events.c +++ b/arch/arm64/kvm/hyp_events.c @@ -3,10 +3,13 @@ * Copyright (C) 2023 Google LLC */ -#include +#include #include +#include #include +#include +#include #include #include "hyp_trace.h" @@ -98,6 +101,89 @@ static const char *hyp_printk_fmt_from_id(u8 fmt_id) return fmt ? fmt->fmt : "Unknown Format"; } +#ifdef CONFIG_PROTECTED_NVHE_FTRACE +extern unsigned long __hyp_patchable_function_entries_start[]; +extern unsigned long __hyp_patchable_function_entries_end[]; +extern unsigned long kvm_nvhe_sym(__hyp_text_start_kern); + +static int hyp_ftrace_init_lr_ins(unsigned long addr) +{ + u32 old, new; + + if (aarch64_insn_read((void *)addr, &old)) + return -EFAULT; + + if (old != aarch64_insn_gen_nop()) + return -EINVAL; + + new = aarch64_insn_gen_move_reg(AARCH64_INSN_REG_9, + AARCH64_INSN_REG_LR, + AARCH64_INSN_VARIANT_64BIT); + if (aarch64_insn_patch_text_nosync((void *)addr, new)) + return -EPERM; + + return 0; +} + +/* Instructions are word-aligned, let's repurpose the LSB */ +#define func_enable(func) ((func) | 0x1) + +#define HYP_FTRACE_SKIP_FUNC (-1ULL) + +static void hyp_ftrace_funcs_init(unsigned long *funcs, unsigned long *funcs_end, + unsigned long hyp_kern_offset, bool clear) +{ + unsigned long *func; + int ret; + + func = funcs; + while (func < funcs_end) { + unsigned long kern_addr = *func + hyp_kern_offset; + char sym[KSYM_SYMBOL_LEN]; + + if (!*func) + break; + + if (clear) + goto skip; + + sprint_symbol_no_offset(sym, kern_addr); + if (!strncmp(sym, "__kvm_nvhe_$", 12)) + goto skip; + + ret = hyp_ftrace_init_lr_ins(kern_addr); + if (ret) { + pr_warn("Failed to patch %ps (%d)\n", (void *)kern_addr, ret); + goto skip; + } + + *func = func_enable(*func); + goto next; + +skip: + *func = HYP_FTRACE_SKIP_FUNC; +next: + func++; + } +} + +static void hyp_ftrace_init(void) +{ + unsigned long hyp_base; + + hyp_base = (unsigned long)kern_hyp_va(lm_alias((unsigned long)__hyp_text_start)); + + hyp_ftrace_funcs_init(__hyp_patchable_function_entries_start, + __hyp_patchable_function_entries_end, + (unsigned long)__hyp_text_start - hyp_base, false); + + /* For the hypervisor to compute its hyp_kern_offset */ + kvm_nvhe_sym(__hyp_text_start_kern) = (unsigned long)__hyp_text_start; +} +#else +static void hyp_ftrace_init(void) { } +#endif + extern struct hyp_event __hyp_events_start[]; extern struct hyp_event __hyp_events_end[]; @@ -458,6 +544,7 @@ int hyp_trace_init_events(void) int nr_events = nr_entries(__hyp_events_start, __hyp_events_end); int nr_event_ids = nr_entries(__hyp_event_ids_start, __hyp_event_ids_end); int nr_printk_fmts = nr_entries(__hyp_printk_fmts_start, __hyp_printk_fmts_end); + int ret; /* __hyp_printk event only supports U8_MAX different formats */ WARN_ON(nr_printk_fmts > U8_MAX); @@ -467,8 +554,14 @@ int hyp_trace_init_events(void) if (WARN(nr_events != nr_event_ids, "Too many trace_hyp_printk()!")) return -EINVAL; - return hyp_event_table_init(__hyp_events_start, __hyp_event_ids_start, - nr_events); + ret = hyp_event_table_init(__hyp_events_start, __hyp_event_ids_start, + nr_events); + if (ret) + return ret; + + hyp_ftrace_init(); + + return 0; } int hyp_trace_init_mod_events(struct hyp_event *event,