From 322fda0405fecaaa540b0fa90393830aaadaf420 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 5 Oct 2021 00:57:49 +1000 Subject: [PATCH 1/2] KVM: PPC: Book3S HV: H_ENTER filter out reserved HPTE[B] value The HPTE B field is a 2-bit field with values 0b10 and 0b11 reserved. This field is also taken from the HPTE and used when KVM executes TLBIEs to set the B field of those instructions. Disallow the guest setting B to a reserved value with H_ENTER by rejecting it. This is the same approach already taken for rejecting reserved (unsupported) LLP values. This prevents the guest from being able to induce the host to execute TLBIE with reserved values, which is not known to be a problem with current processors but in theory it could prevent the TLBIE from working correctly in a future processor. Signed-off-by: Nicholas Piggin Reviewed-by: Fabiano Rosas Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211004145749.1331331-1-npiggin@gmail.com --- arch/powerpc/include/asm/kvm_book3s_64.h | 4 ++++ arch/powerpc/kvm/book3s_hv_rm_mmu.c | 9 +++++++++ 2 files changed, 13 insertions(+) diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 19b6942c6969..fff391b9b97b 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -378,6 +378,10 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, rb |= 1; /* L field */ rb |= r & 0xff000 & ((1ul << a_pgshift) - 1); /* LP field */ } + /* + * This sets both bits of the B field in the PTE. 0b1x values are + * reserved, but those will have been filtered by kvmppc_do_h_enter. + */ rb |= (v >> HPTE_V_SSIZE_SHIFT) << 8; /* B field */ return rb; } diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 632b2545072b..2c1f3c6e72d1 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -207,6 +207,15 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, if (kvm_is_radix(kvm)) return H_FUNCTION; + /* + * The HPTE gets used by compute_tlbie_rb() to set TLBIE bits, so + * these functions should work together -- must ensure a guest can not + * cause problems with the TLBIE that KVM executes. + */ + if ((pteh >> HPTE_V_SSIZE_SHIFT) & 0x2) { + /* B=0b1x is a reserved value, disallow it. */ + return H_PARAMETER; + } psize = kvmppc_actual_pgsz(pteh, ptel); if (!psize) return H_PARAMETER; From 235cee162459d96153d63651ce7ff51752528c96 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Thu, 28 Oct 2021 00:21:50 +1000 Subject: [PATCH 2/2] KVM: PPC: Tick accounting should defer vtime accounting 'til after IRQ handling Commit 112665286d08 ("KVM: PPC: Book3S HV: Context tracking exit guest context before enabling irqs") moved guest_exit() into the interrupt protected area to avoid wrong context warning (or worse). The problem is that tick-based time accounting has not yet been updated at this point (because it depends on the timer interrupt firing), so the guest time gets incorrectly accounted to system time. To fix the problem, follow the x86 fix in commit 160457140187 ("Defer vtime accounting 'til after IRQ handling"), and allow host IRQs to run before accounting the guest exit time. In the case vtime accounting is enabled, this is not required because TB is used directly for accounting. Before this patch, with CONFIG_TICK_CPU_ACCOUNTING=y in the host and a guest running a kernel compile, the 'guest' fields of /proc/stat are stuck at zero. With the patch they can be observed increasing roughly as expected. Fixes: e233d54d4d97 ("KVM: booke: use __kvm_guest_exit") Fixes: 112665286d08 ("KVM: PPC: Book3S HV: Context tracking exit guest context before enabling irqs") Cc: stable@vger.kernel.org # 5.12+ Signed-off-by: Laurent Vivier [np: only required for tick accounting, add Book3E fix, tweak changelog] Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20211027142150.3711582-1-npiggin@gmail.com --- arch/powerpc/kvm/book3s_hv.c | 30 ++++++++++++++++++++++++++++-- arch/powerpc/kvm/booke.c | 16 +++++++++++++++- 2 files changed, 43 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 2acb1c96cfaf..7b74fc0a986b 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -3726,7 +3726,20 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc) kvmppc_set_host_core(pcpu); - guest_exit_irqoff(); + context_tracking_guest_exit(); + if (!vtime_accounting_enabled_this_cpu()) { + local_irq_enable(); + /* + * Service IRQs here before vtime_account_guest_exit() so any + * ticks that occurred while running the guest are accounted to + * the guest. If vtime accounting is enabled, accounting uses + * TB rather than ticks, so it can be done without enabling + * interrupts here, which has the problem that it accounts + * interrupt processing overhead to the host. + */ + local_irq_disable(); + } + vtime_account_guest_exit(); local_irq_enable(); @@ -4510,7 +4523,20 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, kvmppc_set_host_core(pcpu); - guest_exit_irqoff(); + context_tracking_guest_exit(); + if (!vtime_accounting_enabled_this_cpu()) { + local_irq_enable(); + /* + * Service IRQs here before vtime_account_guest_exit() so any + * ticks that occurred while running the guest are accounted to + * the guest. If vtime accounting is enabled, accounting uses + * TB rather than ticks, so it can be done without enabling + * interrupts here, which has the problem that it accounts + * interrupt processing overhead to the host. + */ + local_irq_disable(); + } + vtime_account_guest_exit(); local_irq_enable(); diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 977801c83aff..8c15c90dd3a9 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1042,7 +1042,21 @@ int kvmppc_handle_exit(struct kvm_vcpu *vcpu, unsigned int exit_nr) } trace_kvm_exit(exit_nr, vcpu); - guest_exit_irqoff(); + + context_tracking_guest_exit(); + if (!vtime_accounting_enabled_this_cpu()) { + local_irq_enable(); + /* + * Service IRQs here before vtime_account_guest_exit() so any + * ticks that occurred while running the guest are accounted to + * the guest. If vtime accounting is enabled, accounting uses + * TB rather than ticks, so it can be done without enabling + * interrupts here, which has the problem that it accounts + * interrupt processing overhead to the host. + */ + local_irq_disable(); + } + vtime_account_guest_exit(); local_irq_enable();