diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 3a6c926cd7ba..c0cdbf23e844 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -728,6 +728,7 @@ struct kvm_hyp_req { #define KVM_HYP_LAST_REQ 0 #define KVM_HYP_REQ_TYPE_MEM 1 #define KVM_HYP_REQ_TYPE_MAP 2 +#define KVM_HYP_REQ_TYPE_SPLIT 3 u8 type; union { struct { @@ -742,6 +743,10 @@ struct kvm_hyp_req { unsigned long guest_ipa; size_t size; } map; + struct { + unsigned long guest_ipa; + size_t size; + } split; }; }; diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index fe589b036b5f..5e5ee0f497c0 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -186,6 +186,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, int kvm_handle_guest_abort(struct kvm_vcpu *vcpu); int pkvm_mem_abort_range(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, size_t size); +int __pkvm_pgtable_stage2_split(struct kvm_vcpu *vcpu, phys_addr_t ipa, size_t size); phys_addr_t kvm_mmu_get_httbr(void); phys_addr_t kvm_get_idmap_vector(void); diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index f65e428c1223..4a0f70d0c49d 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -404,6 +404,11 @@ static int handle_hyp_req_map(struct kvm_vcpu *vcpu, return pkvm_mem_abort_range(vcpu, req->map.guest_ipa, req->map.size); } +static int handle_hyp_req_split(struct kvm_vcpu *vcpu, struct kvm_hyp_req *req) +{ + return __pkvm_pgtable_stage2_split(vcpu, req->split.guest_ipa, req->split.size); +} + static int handle_hyp_req(struct kvm_vcpu *vcpu) { struct kvm_hyp_req *hyp_req = vcpu->arch.hyp_reqs; @@ -420,6 +425,9 @@ static int handle_hyp_req(struct kvm_vcpu *vcpu) case KVM_HYP_REQ_TYPE_MAP: ret = handle_hyp_req_map(vcpu, hyp_req); break; + case KVM_HYP_REQ_TYPE_SPLIT: + ret = handle_hyp_req_split(vcpu, hyp_req); + break; default: pr_warn("Unknown kvm_hyp_req type: %d\n", hyp_req->type); ret = -EINVAL; diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index c996c5569847..0c7177fd050b 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -2019,6 +2019,107 @@ int pkvm_mem_abort_prefault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, return pkvm_mem_abort(vcpu, fault_ipa, size, memslot); } +/* + * Splitting is only expected on the back of a relinquish guest HVC in the pKVM case, while + * pkvm_pgtable_stage2_split() can be called with dirty logging. + */ +int __pkvm_pgtable_stage2_split(struct kvm_vcpu *vcpu, phys_addr_t ipa, size_t size) +{ + struct list_head ppage_prealloc = LIST_HEAD_INIT(ppage_prealloc); + struct kvm_hyp_memcache *hyp_memcache = &vcpu->arch.stage2_mc; + struct kvm_pinned_page *ppage, *tmp; + struct kvm_memory_slot *memslot; + struct kvm *kvm = vcpu->kvm; + int idx, p, ret, nr_pages; + struct page **pages; + kvm_pfn_t pfn; + gfn_t gfn; + + if (WARN_ON(!kvm->arch.pkvm.enabled)) + return -EINVAL; + + if (!IS_ALIGNED(ipa, PMD_SIZE) || size != PMD_SIZE) + return -EINVAL; + + ret = topup_hyp_memcache_account(vcpu->kvm, hyp_memcache, 1, 0); + if (ret) + return ret; + + /* We already have 1 pin on the Huge Page */ + nr_pages = (size >> PAGE_SHIFT) - 1; + gfn = (ipa >> PAGE_SHIFT) + 1; + + /* Pre-allocate kvm_pinned_page before acquiring the mmu_lock */ + for (p = 0; p < nr_pages; p++) { + ppage = kzalloc(sizeof(*ppage), GFP_KERNEL_ACCOUNT); + if (!ppage) { + ret = -ENOMEM; + goto free_pinned_pages; + } + list_add(&ppage->list_node, &ppage_prealloc); + } + + idx = srcu_read_lock(&vcpu->kvm->srcu); + memslot = gfn_to_memslot(vcpu->kvm, gfn); + ret = __pkvm_pin_user_pages(kvm, memslot, gfn, nr_pages, &pages); + if (ret) + goto unlock_srcu; + + write_lock(&kvm->mmu_lock); + + ppage = find_ppage(kvm, ipa); + if (!ppage) { + ret = -EPERM; + goto end; + } else if (!ppage->order) { + ret = 0; + goto end; + } + + ret = kvm_call_hyp_nvhe(__pkvm_host_split_guest, ipa >> PAGE_SHIFT, size); + if (ret) + goto end; + + ppage->order = 0; + ppage->pins = 1; + + pfn = page_to_pfn(ppage->page) + 1; + ipa = ipa + PAGE_SIZE; + while (nr_pages--) { + /* Pop a ppage from the pre-allocated list */ + ppage = list_first_entry(&ppage_prealloc, struct kvm_pinned_page, list_node); + list_del_init(&ppage->list_node); + + ppage->page = pfn_to_page(pfn); + ppage->ipa = ipa; + ppage->order = 0; + ppage->pins = 1; + insert_ppage(kvm, ppage); + + pfn += 1; + ipa += PAGE_SIZE; + } + +end: + write_unlock(&kvm->mmu_lock); + + if (ret) + unpin_user_pages(pages, nr_pages); + kfree(pages); + +unlock_srcu: + srcu_read_unlock(&vcpu->kvm->srcu, idx); + +free_pinned_pages: + /* Free unused pre-allocated kvm_pinned_page */ + list_for_each_entry_safe(ppage, tmp, &ppage_prealloc, list_node) { + list_del(&ppage->list_node); + kfree(ppage); + } + + return ret; +} + static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct kvm_s2_trans *nested, struct kvm_memory_slot *memslot, diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c index 278a53040c29..da2fb5e24266 100644 --- a/arch/arm64/kvm/pkvm.c +++ b/arch/arm64/kvm/pkvm.c @@ -733,10 +733,10 @@ void pkvm_host_reclaim_page(struct kvm *host_kvm, phys_addr_t ipa) ppage = kvm_pinned_pages_iter_first(&host_kvm->arch.pkvm.pinned_pages, ipa, ipa + PAGE_SIZE - 1); if (ppage) { + WARN_ON_ONCE(ppage->pins != 1); + if (ppage->pins) ppage->pins--; - else - WARN_ON(1); pins = ppage->pins; if (!pins)