diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h index db48cfb89e00..87cfeb4c43f0 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h @@ -81,9 +81,9 @@ int hyp_check_range_owned(u64 addr, u64 size); int __pkvm_install_guest_mmio(struct pkvm_hyp_vcpu *hyp_vcpu, u64 pfn, u64 gfn); int pkvm_get_guest_pa_request(struct pkvm_hyp_vcpu *hyp_vcpu, u64 ipa, - size_t ipa_size_request, u64 *out_pa, s8 *out_level, - u64 *exit_code); - + size_t ipa_size_request, u64 *out_pa, s8 *out_level); +int pkvm_get_guest_pa_request_use_dma(struct pkvm_hyp_vcpu *hyp_vcpu, u64 ipa, + size_t ipa_size_request, u64 *out_pa, s8 *level); bool addr_is_memory(phys_addr_t phys); int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot, diff --git a/arch/arm64/kvm/hyp/nvhe/device/device.c b/arch/arm64/kvm/hyp/nvhe/device/device.c index 2a2efa2a2ab4..1bfa4022c96f 100644 --- a/arch/arm64/kvm/hyp/nvhe/device/device.c +++ b/arch/arm64/kvm/hyp/nvhe/device/device.c @@ -281,11 +281,16 @@ bool pkvm_device_request_mmio(struct pkvm_hyp_vcpu *hyp_vcpu, u64 *exit_code) goto out_inval; ret = pkvm_get_guest_pa_request(hyp_vcpu, ipa, PAGE_SIZE, - &token, &level, exit_code); - if (ret == -ENOENT) + &token, &level); + if (ret == -ENOENT) { + /* Repeat next time. */ + write_sysreg_el2(read_sysreg_el2(SYS_ELR) - 4, SYS_ELR); + *exit_code = ARM_EXCEPTION_HYP_REQ; return false; - else if (ret) + } + else if (ret) { goto out_inval; + } /* It's expected the address is mapped as page for MMIO */ WARN_ON(level != KVM_PGTABLE_LAST_LEVEL); diff --git a/arch/arm64/kvm/hyp/nvhe/iommu/pviommu.c b/arch/arm64/kvm/hyp/nvhe/iommu/pviommu.c index 84829d024573..b02f6b4cb221 100644 --- a/arch/arm64/kvm/hyp/nvhe/iommu/pviommu.c +++ b/arch/arm64/kvm/hyp/nvhe/iommu/pviommu.c @@ -46,14 +46,14 @@ static void pkvm_guest_iommu_free_id(int domain_id) guest_domains[domain_id / BITS_PER_LONG] &= ~(1UL << (domain_id % BITS_PER_LONG)); } -static bool pkvm_guest_iommu_map(struct pkvm_hyp_vcpu *hyp_vcpu) +/* + * check if vcpu has requested memory before + */ +static bool __need_req(struct kvm_vcpu *vcpu) { - return false; -} + struct kvm_hyp_req *hyp_req = vcpu->arch.hyp_reqs; -static bool pkvm_guest_iommu_unmap(struct pkvm_hyp_vcpu *hyp_vcpu) -{ - return false; + return hyp_req->type != KVM_HYP_LAST_REQ; } static void pkvm_pviommu_hyp_req(u64 *exit_code) @@ -188,6 +188,131 @@ out_ret: return true; } +static int __smccc_prot_linux(u64 prot) +{ + int iommu_prot = 0; + + if (prot & ARM_SMCCC_KVM_PVIOMMU_READ) + iommu_prot |= IOMMU_READ; + if (prot & ARM_SMCCC_KVM_PVIOMMU_WRITE) + iommu_prot |= IOMMU_WRITE; + if (prot & ARM_SMCCC_KVM_PVIOMMU_CACHE) + iommu_prot |= IOMMU_CACHE; + if (prot & ARM_SMCCC_KVM_PVIOMMU_NOEXEC) + iommu_prot |= IOMMU_NOEXEC; + if (prot & ARM_SMCCC_KVM_PVIOMMU_MMIO) + iommu_prot |= IOMMU_MMIO; + if (prot & ARM_SMCCC_KVM_PVIOMMU_PRIV) + iommu_prot |= IOMMU_PRIV; + + return iommu_prot; +} + +static bool pkvm_guest_iommu_map(struct pkvm_hyp_vcpu *hyp_vcpu, u64 *exit_code) +{ + size_t mapped, total_mapped = 0; + struct kvm_vcpu *vcpu = &hyp_vcpu->vcpu; + u64 domain = smccc_get_arg2(vcpu); + u64 iova = smccc_get_arg3(vcpu); + u64 ipa = smccc_get_arg4(vcpu); + u64 size = smccc_get_arg5(vcpu); + u64 prot = smccc_get_arg6(vcpu); + u64 paddr; + int ret; + s8 level; + u64 smccc_ret = SMCCC_RET_SUCCESS; + + if (!IS_ALIGNED(size, PAGE_SIZE) || + !IS_ALIGNED(ipa, PAGE_SIZE) || + !IS_ALIGNED(iova, PAGE_SIZE)) { + smccc_set_retval(vcpu, SMCCC_RET_INVALID_PARAMETER, 0, 0, 0); + return true; + } + + while (size) { + /* + * We need to get the PA and atomically use the page temporarily to avoid + * racing with relinquish. + */ + ret = pkvm_get_guest_pa_request_use_dma(hyp_vcpu, ipa, size, + &paddr, &level); + if (ret == -ENOENT) { + /* + * Pages are not mapped and a request was created, updated the guest + * state and go back to host + */ + goto out_host_request; + } else if (ret) { + smccc_ret = SMCCC_RET_INVALID_PARAMETER; + break; + } + + mapped = kvm_iommu_map_pages(domain, iova, paddr, + PAGE_SIZE, min(size, kvm_granule_size(level)) / PAGE_SIZE, + __smccc_prot_linux(prot)); + WARN_ON(__pkvm_unuse_dma(paddr, kvm_granule_size(level), hyp_vcpu)); + if (!mapped) { + if (!__need_req(vcpu)) { + smccc_ret = SMCCC_RET_INVALID_PARAMETER; + break; + } + /* + * Return back to the host with a request to fill the memcache, + * and also update the guest state with what was mapped, so the + * next time the vcpu runs it can check that not all requested + * memory was mapped, and it would repeat the HVC with the rest + * of the range. + */ + goto out_host_request; + } + + ipa += mapped; + iova += mapped; + total_mapped += mapped; + size -= mapped; + } + + smccc_set_retval(vcpu, smccc_ret, total_mapped, 0, 0); + return true; +out_host_request: + *exit_code = ARM_EXCEPTION_HYP_REQ; + smccc_set_retval(vcpu, SMCCC_RET_SUCCESS, total_mapped, 0, 0); + return false; +} + +static bool pkvm_guest_iommu_unmap(struct pkvm_hyp_vcpu *hyp_vcpu, u64 *exit_code) +{ + struct kvm_vcpu *vcpu = &hyp_vcpu->vcpu; + u64 domain = smccc_get_arg2(vcpu); + u64 iova = smccc_get_arg3(vcpu); + u64 size = smccc_get_arg4(vcpu); + size_t unmapped; + unsigned long ret = SMCCC_RET_SUCCESS; + + if (!IS_ALIGNED(size, PAGE_SIZE) || + !IS_ALIGNED(iova, PAGE_SIZE) || + smccc_get_arg5(vcpu) || + smccc_get_arg6(vcpu)) { + smccc_set_retval(vcpu, SMCCC_RET_INVALID_PARAMETER, 0, 0, 0); + return true; + } + + unmapped = kvm_iommu_unmap_pages(domain, iova, PAGE_SIZE, size / PAGE_SIZE); + if (unmapped < size) { + if (!__need_req(vcpu)) { + ret = SMCCC_RET_INVALID_PARAMETER; + } else { + /* See comment in pkvm_guest_iommu_map(). */ + *exit_code = ARM_EXCEPTION_HYP_REQ; + smccc_set_retval(vcpu, SMCCC_RET_SUCCESS, unmapped, 0, 0); + return false; + } + } + + smccc_set_retval(vcpu, ret, unmapped, 0, 0); + return true; +} + bool kvm_handle_pviommu_hvc(struct kvm_vcpu *vcpu, u64 *exit_code) { u64 iommu_op = smccc_get_arg1(vcpu); @@ -209,9 +334,9 @@ bool kvm_handle_pviommu_hvc(struct kvm_vcpu *vcpu, u64 *exit_code) case KVM_PVIOMMU_OP_DETACH_DEV: return pkvm_guest_iommu_detach_dev(hyp_vcpu); case KVM_PVIOMMU_OP_MAP_PAGES: - return pkvm_guest_iommu_map(hyp_vcpu); + return pkvm_guest_iommu_map(hyp_vcpu, exit_code); case KVM_PVIOMMU_OP_UNMAP_PAGES: - return pkvm_guest_iommu_unmap(hyp_vcpu); + return pkvm_guest_iommu_unmap(hyp_vcpu, exit_code); } smccc_set_retval(vcpu, SMCCC_RET_NOT_SUPPORTED, 0, 0, 0); diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 47604640c32c..b1d82bdcbe3a 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -1852,6 +1852,60 @@ static void __pkvm_unuse_dma_page(phys_addr_t phys_addr) hyp_page_ref_dec(p); } +static int __pkvm_use_dma_locked(phys_addr_t phys_addr, size_t size, + struct pkvm_hyp_vcpu *hyp_vcpu) +{ + int i; + int ret = 0; + struct kvm_mem_range r; + size_t nr_pages = size >> PAGE_SHIFT; + struct memblock_region *reg = find_mem_range(phys_addr, &r); + + if (WARN_ON(!PAGE_ALIGNED(phys_addr | size)) || !is_in_mem_range(phys_addr + size - 1, &r)) + return -EINVAL; + + /* + * Some differences between handling of RAM and device memory: + * - The hyp vmemmap area for device memory is not backed by physical + * pages in the hyp page tables. + * - However, in some cases modules can donate MMIO, as they can't be + * refcounted, taint them by marking them as shared PKVM_PAGE_TAINTED, and that + * will prevent any future transition. + */ + if (!reg) { + enum kvm_pgtable_prot prot; + + if (hyp_vcpu) + return EINVAL; + + ret = ___host_check_page_state_range(phys_addr, size, + PKVM_PAGE_TAINTED, + reg, false); + if (!ret) + return ret; + ret = ___host_check_page_state_range(phys_addr, size, + PKVM_PAGE_OWNED, + reg, false); + if (ret) + return ret; + prot = pkvm_mkstate(PKVM_HOST_MMIO_PROT, PKVM_PAGE_TAINTED); + ret = host_stage2_idmap_locked(phys_addr, size, prot, false); + } else { + /* For VMs, we know if we reach this point the VM has access to the page. */ + if (!hyp_vcpu) { + ret = ___host_check_page_state_range(phys_addr, size, + PKVM_PAGE_OWNED, reg, false); + if (ret) + return ret; + } + + for (i = 0; i < nr_pages; i++) + __pkvm_use_dma_page(phys_addr + i * PAGE_SIZE); + } + + return ret; +} + /* * __pkvm_use_dma - Mark memory as used for DMA * @phys_addr: physical address of the DMA region @@ -1870,59 +1924,10 @@ static void __pkvm_unuse_dma_page(phys_addr_t phys_addr) */ int __pkvm_use_dma(phys_addr_t phys_addr, size_t size, struct pkvm_hyp_vcpu *hyp_vcpu) { - int i; - int ret = 0; - struct kvm_mem_range r; - size_t nr_pages = size >> PAGE_SHIFT; - struct memblock_region *reg = find_mem_range(phys_addr, &r); - - if (WARN_ON(!PAGE_ALIGNED(phys_addr | size)) || !is_in_mem_range(phys_addr + size - 1, &r)) - return -EINVAL; + int ret; host_lock_component(); - - /* - * Some differences between handling of RAM and device memory: - * - The hyp vmemmap area for device memory is not backed by physical - * pages in the hyp page tables. - * - However, in some cases modules can donate MMIO, as they can't be - * refcounted, taint them by marking them as shared PKVM_PAGE_TAINTED, and that - * will prevent any future transition. - */ - if (!reg) { - enum kvm_pgtable_prot prot; - - if (hyp_vcpu) { - ret = -EINVAL; - goto out_ret; - } - - ret = ___host_check_page_state_range(phys_addr, size, - PKVM_PAGE_TAINTED, - reg, false); - if (!ret) - goto out_ret; - ret = ___host_check_page_state_range(phys_addr, size, - PKVM_PAGE_OWNED, - reg, false); - if (ret) - goto out_ret; - prot = pkvm_mkstate(PKVM_HOST_MMIO_PROT, PKVM_PAGE_TAINTED); - ret = host_stage2_idmap_locked(phys_addr, size, prot, false); - } else { - /* For VMs, we know if we reach this point the VM has access to the page. */ - if (!hyp_vcpu) { - ret = ___host_check_page_state_range(phys_addr, size, - PKVM_PAGE_OWNED, reg, false); - if (ret) - goto out_ret; - } - - for (i = 0; i < nr_pages; i++) - __pkvm_use_dma_page(phys_addr + i * PAGE_SIZE); - } - -out_ret: + ret = __pkvm_use_dma_locked(phys_addr, size, hyp_vcpu); host_unlock_component(); return ret; } @@ -2733,8 +2738,7 @@ teardown: /* Return PA for an owned guest IPA or request it, and repeat the guest HVC */ int pkvm_get_guest_pa_request(struct pkvm_hyp_vcpu *hyp_vcpu, u64 ipa, - size_t ipa_size_request, u64 *out_pa, s8 *out_level, - u64 *exit_code) + size_t ipa_size_request, u64 *out_pa, s8 *out_level) { struct kvm_hyp_req *req; kvm_pte_t pte; @@ -2752,9 +2756,6 @@ int pkvm_get_guest_pa_request(struct pkvm_hyp_vcpu *hyp_vcpu, u64 ipa, req->map.guest_ipa = ipa; req->map.size = ipa_size_request; - *exit_code = ARM_EXCEPTION_HYP_REQ; - /* Repeat next time. */ - write_sysreg_el2(read_sysreg_el2(SYS_ELR) - 4, SYS_ELR); return -ENOENT; } @@ -2767,6 +2768,23 @@ int pkvm_get_guest_pa_request(struct pkvm_hyp_vcpu *hyp_vcpu, u64 ipa, return 0; } +/* Get a PA and use the page for DMA */ +int pkvm_get_guest_pa_request_use_dma(struct pkvm_hyp_vcpu *hyp_vcpu, u64 ipa, + size_t ipa_size_request, u64 *out_pa, s8 *level) +{ + int ret; + + host_lock_component(); + ret = pkvm_get_guest_pa_request(hyp_vcpu, ipa, ipa_size_request, + out_pa, level); + if (ret) + goto out_ret; + WARN_ON(__pkvm_use_dma_locked(*out_pa, kvm_granule_size(*level), hyp_vcpu)); +out_ret: + host_unlock_component(); + return ret; +} + #ifdef CONFIG_PKVM_SELFTESTS struct pkvm_expected_state { enum pkvm_page_state host; diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index 31bc5a81ac90..31e00988404e 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -112,6 +112,13 @@ #define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_2 0x564bcaa9U #define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_3 0x743a004dU +#define ARM_SMCCC_KVM_PVIOMMU_READ (1 << 0) +#define ARM_SMCCC_KVM_PVIOMMU_WRITE (1 << 1) +#define ARM_SMCCC_KVM_PVIOMMU_CACHE (1 << 2) +#define ARM_SMCCC_KVM_PVIOMMU_NOEXEC (1 << 3) +#define ARM_SMCCC_KVM_PVIOMMU_MMIO (1 << 4) +#define ARM_SMCCC_KVM_PVIOMMU_PRIV (1 << 5) + /* KVM "vendor specific" services */ #define ARM_SMCCC_KVM_FUNC_FEATURES 0 #define ARM_SMCCC_KVM_FUNC_PTP 1