From 0ef77a1bee1ef820e09e72dd91bfa9b05abd7c95 Mon Sep 17 00:00:00 2001 From: Mostafa Saleh Date: Mon, 13 Nov 2023 11:11:58 +0000 Subject: [PATCH] ANDROID: drivers/vfio: Add VFIO_PKVM_IOMMU pKVM provides mutual distrust between host kernel and protected VMs(pVM) One solution to provide DMA isolation in this model, is to move the IOMMU control to the hypervisor and para-virtualize the IOMMU interface for the host and guest kernel. (none of them have direct access to IOMMU programming interface). In the case of device assignement, the host can't map memory in for the guest kernel (as it is not trusted). So, what mainly needs to be done is to assign a blocking domain, when VFIO assigns the device to user space, so it can't issue any DMA, and when the guest take control it can program the IOMMU through hypervisor with collapsed translation (IOVA->PA directly). Bug: 357781595 Bug: 348382247 Change-Id: Ie424c54d32f43016465de71f24129fea2fe47e59 Signed-off-by: Mostafa Saleh --- drivers/vfio/Kconfig | 10 ++ drivers/vfio/Makefile | 1 + drivers/vfio/platform/vfio_platform_common.c | 8 ++ drivers/vfio/vfio_pkvm_iommu.c | 103 +++++++++++++++++++ include/linux/vfio.h | 2 + include/uapi/linux/vfio.h | 12 +++ 6 files changed, 136 insertions(+) create mode 100644 drivers/vfio/vfio_pkvm_iommu.c diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index ceae52fd7586..5d11079a0378 100644 --- a/drivers/vfio/Kconfig +++ b/drivers/vfio/Kconfig @@ -75,6 +75,16 @@ config VFIO_NOIOMMU If you don't know what to do here, say N. +config VFIO_PKVM_IOMMU + bool "VFIO pKVM IOMMU" + depends on ARM64 + help + This is needed if you plan to assign devices to pKVM protected virtual + machines. PKVM_IOMMU, mostly does nothing as the hypervisor ensured DMA + isolation and would provide a guest pvIOMMU interface if configured. + + If you don't know what to do here, say N. + config VFIO_VIRQFD bool select EVENTFD diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile index b2fc9fb499d8..ab82c6848dd5 100644 --- a/drivers/vfio/Makefile +++ b/drivers/vfio/Makefile @@ -16,3 +16,4 @@ obj-$(CONFIG_VFIO_PLATFORM_BASE) += platform/ obj-$(CONFIG_VFIO_MDEV) += mdev/ obj-$(CONFIG_VFIO_FSL_MC) += fsl-mc/ obj-$(CONFIG_VFIO_CDX) += cdx/ +obj-$(CONFIG_VFIO_PKVM_IOMMU) += vfio_pkvm_iommu.o diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c index 3bf1043cd795..b0b64b924580 100644 --- a/drivers/vfio/platform/vfio_platform_common.c +++ b/drivers/vfio/platform/vfio_platform_common.c @@ -450,6 +450,10 @@ ssize_t vfio_platform_read(struct vfio_device *core_vdev, unsigned int index = VFIO_PLATFORM_OFFSET_TO_INDEX(*ppos); loff_t off = *ppos & VFIO_PLATFORM_OFFSET_MASK; + /* Only readable through mmap*/ + if (core_vdev->protected) + return -EINVAL; + if (index >= vdev->num_regions) return -EINVAL; @@ -533,6 +537,10 @@ ssize_t vfio_platform_write(struct vfio_device *core_vdev, const char __user *bu unsigned int index = VFIO_PLATFORM_OFFSET_TO_INDEX(*ppos); loff_t off = *ppos & VFIO_PLATFORM_OFFSET_MASK; + /* Only writable through mmap*/ + if (core_vdev->protected) + return -EINVAL; + if (index >= vdev->num_regions) return -EINVAL; diff --git a/drivers/vfio/vfio_pkvm_iommu.c b/drivers/vfio/vfio_pkvm_iommu.c new file mode 100644 index 000000000000..38fb5b0d1fd6 --- /dev/null +++ b/drivers/vfio/vfio_pkvm_iommu.c @@ -0,0 +1,103 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2023 Google LLC + * Author: Mostafa Saleh + * + * pKVM provides mutual distrust between host kernel and protected VMs(pVM) + * One solution to provide DMA isolation in this model, is to move the IOMMU + * control to the hypervisor and para-virtualize the IOMMU interface for + * the host and guest kernels. (none of them have direct access to IOMMU + * programming interface). + * In the case of device assignment, the host can't map memory for the + * guest kernel in the IOMMU (as it is not trusted). + * So, what the host kernel would attach a blocking domain, when VFIO + * assigns the device to user space, so it can't issue any DMA, and + * when the guest take control it can program the IOMMU through hypervisor. + * This looks similar to noiommu but with one main difference is that + * group->type is VFIO_IOMMU, which attaches the groups to a blocking domain. + */ + +#include +#include +#include "vfio.h" + +static void *pkvm_iommu_open(unsigned long arg) +{ + if (arg != VFIO_PKVM_IOMMU) + return ERR_PTR(-EINVAL); + + return NULL; +} + +static void pkvm_iommu_release(void *iommu_data) +{ +} + +static long pkvm_iommu_ioctl(void *iommu_data, + unsigned int cmd, unsigned long arg) +{ + if (cmd == VFIO_CHECK_EXTENSION) + return arg == VFIO_PKVM_IOMMU; + + return -ENOTTY; +} + +static int pkvm_iommu_attach_group(void *iommu_data, + struct iommu_group *iommu_group, + enum vfio_group_type type) +{ + /* + * VFIO already calls iommu_group_claim_dma_owner() which attaches + * the group to a blocking domain. + */ + + return 0; +} + +static void pkvm_iommu_detach_group(void *iommu_data, + struct iommu_group *iommu_group) +{ + /* + * VFIO calls iommu_group_release_dma_owner(). + */ +} + +static void pkvm_iommu_register_device(void *iommu_data, + struct vfio_device *vdev) +{ + vdev->protected = true; +} + +static void pkvm_iommu_unregister_device(void *iommu_data, + struct vfio_device *vdev) +{ +} + +static const struct vfio_iommu_driver_ops pkvm_iommu_ops = { + .name = "vfio-pkvm-iommu", + .owner = THIS_MODULE, + .open = pkvm_iommu_open, + .release = pkvm_iommu_release, + .ioctl = pkvm_iommu_ioctl, + .attach_group = pkvm_iommu_attach_group, + .detach_group = pkvm_iommu_detach_group, + .register_device = pkvm_iommu_register_device, + .unregister_device = pkvm_iommu_unregister_device, +}; + +static int __init pkvm_iommu_init(void) +{ + return vfio_register_iommu_driver(&pkvm_iommu_ops); +} + +static void __exit pkvm_iommu_exit(void) +{ + vfio_unregister_iommu_driver(&pkvm_iommu_ops); +} + +module_init(pkvm_iommu_init); +module_exit(pkvm_iommu_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("smostafa@google.com"); +MODULE_DESCRIPTION("VFIO IOMMU for pKVM pvIOMMU"); diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 861bbeaa3d86..0158a536cccb 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -77,6 +77,8 @@ struct vfio_device { */ struct dentry *debug_root; #endif + /* protected by more privileged entity(hypervisor). */ + bool protected; }; /** diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 2b68e6cdf190..7ef291d5196b 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -56,6 +56,18 @@ */ #define VFIO_UPDATE_VADDR 10 +/* + * pKVM can control IOMMUs (first-stage) instead of the kernel to enforce + * DMA protection for guests. + * In this case, pKVM can provide a para-virtualized interface for the kernel + * and for guests to program the IOMMU, where it will ensure that no VM can + * access other VM data. + * This allows the guest to have access to program it's IOMMU compared to + * VFIO_TYPE1v2_IOMMU which program. the IOMMU from the host and leave the + * VM with no control over its DMA + */ +#define VFIO_PKVM_IOMMU 30 + /* * The IOCTL interface is designed for extensibility by embedding the * structure length (argsz) and flags into structures passed between