ANDROID: drivers/vfio: Add VFIO_PKVM_IOMMU

pKVM provides mutual distrust between host kernel and protected VMs(pVM) One solution to provide DMA isolation in this model, is to move the IOMMU control to the hypervisor and para-virtualize the IOMMU interface for the host and guest kernel. (none of them have direct access to IOMMU programming interface). In the case of device assignement, the host can't map memory in for the guest kernel (as it is not trusted). So, what mainly needs to be done is to assign a blocking domain, when VFIO assigns the device to user space, so it can't issue any DMA, and when the guest take control it can program the IOMMU through hypervisor with collapsed translation (IOVA->PA directly). Bug: 357781595 Bug: 348382247 Change-Id: Ie424c54d32f43016465de71f24129fea2fe47e59 Signed-off-by: Mostafa Saleh <smostafa@google.com>
2023-11-13 11:11:58 +00:00
parent 007871cbf7
commit 0ef77a1bee
6 changed files with 136 additions and 0 deletions
--- a/drivers/vfio/Kconfig
+++ b/drivers/vfio/Kconfig
@@ -75,6 +75,16 @@ config VFIO_NOIOMMU

 	  If you don't know what to do here, say N.

+config VFIO_PKVM_IOMMU
+	bool "VFIO pKVM IOMMU"
+	depends on ARM64
+	help
+	  This is needed if you plan to assign devices to pKVM protected virtual
+	  machines. PKVM_IOMMU, mostly does nothing as the hypervisor ensured DMA
+	  isolation and would provide a guest pvIOMMU interface if configured.
+
+	  If you don't know what to do here, say N.
+
 config VFIO_VIRQFD
 	bool
 	select EVENTFD
--- a/drivers/vfio/Makefile
+++ b/drivers/vfio/Makefile
@@ -16,3 +16,4 @@ obj-$(CONFIG_VFIO_PLATFORM_BASE) += platform/
 obj-$(CONFIG_VFIO_MDEV) += mdev/
 obj-$(CONFIG_VFIO_FSL_MC) += fsl-mc/
 obj-$(CONFIG_VFIO_CDX) += cdx/
+obj-$(CONFIG_VFIO_PKVM_IOMMU) += vfio_pkvm_iommu.o
--- a/drivers/vfio/platform/vfio_platform_common.c
+++ b/drivers/vfio/platform/vfio_platform_common.c
@@ -450,6 +450,10 @@ ssize_t vfio_platform_read(struct vfio_device *core_vdev,
 	unsigned int index = VFIO_PLATFORM_OFFSET_TO_INDEX(*ppos);
 	loff_t off = *ppos & VFIO_PLATFORM_OFFSET_MASK;

+	/* Only readable through mmap*/
+	if (core_vdev->protected)
+		return -EINVAL;
+
 	if (index >= vdev->num_regions)
 		return -EINVAL;

@@ -533,6 +537,10 @@ ssize_t vfio_platform_write(struct vfio_device *core_vdev, const char __user *bu
 	unsigned int index = VFIO_PLATFORM_OFFSET_TO_INDEX(*ppos);
 	loff_t off = *ppos & VFIO_PLATFORM_OFFSET_MASK;

+	/* Only writable through mmap*/
+	if (core_vdev->protected)
+		return -EINVAL;
+
 	if (index >= vdev->num_regions)
 		return -EINVAL;

--- a/drivers/vfio/vfio_pkvm_iommu.c
+++ b/drivers/vfio/vfio_pkvm_iommu.c
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2023 Google LLC
+ * Author: Mostafa Saleh <smostafa@google.com>
+ *
+ * pKVM provides mutual distrust between host kernel and protected VMs(pVM)
+ * One solution to provide DMA isolation in this model, is to move the IOMMU
+ * control to the hypervisor and para-virtualize the IOMMU interface for
+ * the host and guest kernels. (none of them have direct access to IOMMU
+ * programming interface).
+ * In the case of device assignment, the host can't map memory for the
+ * guest kernel in the IOMMU (as it is not trusted).
+ * So, what the host kernel would attach a blocking domain, when VFIO
+ * assigns the device to user space, so it can't issue any DMA, and
+ * when the guest take control it can program the IOMMU through hypervisor.
+ * This looks similar to noiommu but with one main difference is that
+ * group->type is VFIO_IOMMU, which attaches the groups to a blocking domain.
+ */
+
+#include <linux/module.h>
+#include <linux/vfio.h>
+#include "vfio.h"
+
+static void *pkvm_iommu_open(unsigned long arg)
+{
+	if (arg != VFIO_PKVM_IOMMU)
+		return ERR_PTR(-EINVAL);
+
+	return NULL;
+}
+
+static void pkvm_iommu_release(void *iommu_data)
+{
+}
+
+static long pkvm_iommu_ioctl(void *iommu_data,
+			     unsigned int cmd, unsigned long arg)
+{
+	if (cmd == VFIO_CHECK_EXTENSION)
+		return arg == VFIO_PKVM_IOMMU;
+
+	return -ENOTTY;
+}
+
+static int pkvm_iommu_attach_group(void *iommu_data,
+				   struct iommu_group *iommu_group,
+				   enum vfio_group_type type)
+{
+	/*
+	 * VFIO already calls iommu_group_claim_dma_owner() which attaches
+	 * the group to a blocking domain.
+	 */
+
+	return 0;
+}
+
+static void pkvm_iommu_detach_group(void *iommu_data,
+				    struct iommu_group *iommu_group)
+{
+	/*
+	 * VFIO calls iommu_group_release_dma_owner().
+	 */
+}
+
+static void pkvm_iommu_register_device(void *iommu_data,
+				       struct vfio_device *vdev)
+{
+	vdev->protected = true;
+}
+
+static void pkvm_iommu_unregister_device(void *iommu_data,
+					 struct vfio_device *vdev)
+{
+}
+
+static const struct vfio_iommu_driver_ops pkvm_iommu_ops = {
+	.name			= "vfio-pkvm-iommu",
+	.owner			= THIS_MODULE,
+	.open			= pkvm_iommu_open,
+	.release		= pkvm_iommu_release,
+	.ioctl			= pkvm_iommu_ioctl,
+	.attach_group		= pkvm_iommu_attach_group,
+	.detach_group		= pkvm_iommu_detach_group,
+	.register_device	= pkvm_iommu_register_device,
+	.unregister_device	= pkvm_iommu_unregister_device,
+};
+
+static int __init pkvm_iommu_init(void)
+{
+	return vfio_register_iommu_driver(&pkvm_iommu_ops);
+}
+
+static void __exit pkvm_iommu_exit(void)
+{
+	vfio_unregister_iommu_driver(&pkvm_iommu_ops);
+}
+
+module_init(pkvm_iommu_init);
+module_exit(pkvm_iommu_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("smostafa@google.com");
+MODULE_DESCRIPTION("VFIO IOMMU for pKVM pvIOMMU");
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -77,6 +77,8 @@ struct vfio_device {
 	 */
 	struct dentry *debug_root;
 #endif
+	/* protected by more privileged entity(hypervisor). */
+	bool				protected;
 };

 /**
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -56,6 +56,18 @@
 */
 #define VFIO_UPDATE_VADDR		10

+/*
+ * pKVM can control IOMMUs (first-stage) instead of the kernel to enforce
+ * DMA protection for guests.
+ * In this case, pKVM can provide a para-virtualized interface for the kernel
+ * and for guests to program the IOMMU, where it will ensure that no VM can
+ * access other VM data.
+ * This allows the guest to have access to program it's IOMMU compared to
+ * VFIO_TYPE1v2_IOMMU which program. the IOMMU from the host and leave the
+ * VM with no control over its DMA
+ */
+#define VFIO_PKVM_IOMMU			30
+
 /*
 * The IOCTL interface is designed for extensibility by embedding the
 * structure length (argsz) and flags into structures passed between