diff --git a/MAINTAINERS b/MAINTAINERS index c1c697cd6eb5..826e2f38db3a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -21625,6 +21625,7 @@ M: Thierry Reding R: Krishna Reddy L: linux-tegra@vger.kernel.org S: Supported +F: drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c F: drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c F: drivers/iommu/tegra* diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 9dbb55e745bd..f58d6fc37d30 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -406,6 +406,18 @@ config ARM_SMMU_V3_SVA Say Y here if your system supports SVA extensions such as PCIe PASID and PRI. + +config TEGRA241_CMDQV + bool "NVIDIA Tegra241 CMDQ-V extension support for ARM SMMUv3" + depends on ARM_SMMU_V3 + help + Support for NVIDIA CMDQ-Virtualization extension for ARM SMMUv3. The + CMDQ-V extension is similar to v3.3 ECMDQ for multi command queues + support, except with virtualization capabilities. + + Say Y here if your system is NVIDIA Tegra241 (Grace) or it has the same + CMDQ-V extension. + config S390_IOMMU def_bool y if S390 && PCI depends on S390 && PCI diff --git a/drivers/iommu/arm/arm-smmu-v3/Makefile b/drivers/iommu/arm/arm-smmu-v3/Makefile index 54feb1ecccad..8dff2bc4c7f3 100644 --- a/drivers/iommu/arm/arm-smmu-v3/Makefile +++ b/drivers/iommu/arm/arm-smmu-v3/Makefile @@ -2,4 +2,5 @@ obj-$(CONFIG_ARM_SMMU_V3) += arm_smmu_v3.o arm_smmu_v3-objs-y += arm-smmu-v3.o arm_smmu_v3-objs-$(CONFIG_ARM_SMMU_V3_SVA) += arm-smmu-v3-sva.o +arm_smmu_v3-objs-$(CONFIG_TEGRA241_CMDQV) += tegra241-cmdqv.o arm_smmu_v3-objs := $(arm_smmu_v3-objs-y) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index b31fff4f4741..9b8d4a68daee 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2,7 +2,8 @@ /* * IOMMU API for ARM architected SMMUv3 implementations. * - * Copyright (C) 2015 ARM Limited + * SPDX-FileCopyrightText: Copyright (C) 2015 ARM Limited + * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. All rights reserved. * * Author: Will Deacon * @@ -23,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -345,14 +347,30 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent) return 0; } -static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu) +static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq_ent *ent) { - return &smmu->cmdq; + struct arm_smmu_cmdq *cmdq = NULL; + + if (smmu->impl_ops && smmu->impl_ops->get_secondary_cmdq) + cmdq = smmu->impl_ops->get_secondary_cmdq(smmu, ent); + + return cmdq ?: &smmu->cmdq; +} + +static bool arm_smmu_cmdq_needs_busy_polling(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq *cmdq) +{ + if (cmdq == &smmu->cmdq) + return false; + + return smmu->options & ARM_SMMU_OPT_TEGRA241_CMDQV; } static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu, - struct arm_smmu_queue *q, u32 prod) + struct arm_smmu_cmdq *cmdq, u32 prod) { + struct arm_smmu_queue *q = &cmdq->q; struct arm_smmu_cmdq_ent ent = { .opcode = CMDQ_OP_CMD_SYNC, }; @@ -367,10 +385,12 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu, } arm_smmu_cmdq_build_cmd(cmd, &ent); + if (arm_smmu_cmdq_needs_busy_polling(smmu, cmdq)) + u64p_replace_bits(cmd, CMDQ_SYNC_0_CS_NONE, CMDQ_SYNC_0_CS); } -static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu, - struct arm_smmu_queue *q) +void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq *cmdq) { static const char * const cerror_str[] = { [CMDQ_ERR_CERROR_NONE_IDX] = "No error", @@ -378,6 +398,7 @@ static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu, [CMDQ_ERR_CERROR_ABT_IDX] = "Abort on command fetch", [CMDQ_ERR_CERROR_ATC_INV_IDX] = "ATC invalidate timeout", }; + struct arm_smmu_queue *q = &cmdq->q; int i; u64 cmd[CMDQ_ENT_DWORDS]; @@ -420,13 +441,15 @@ static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu, /* Convert the erroneous command into a CMD_SYNC */ arm_smmu_cmdq_build_cmd(cmd, &cmd_sync); + if (arm_smmu_cmdq_needs_busy_polling(smmu, cmdq)) + u64p_replace_bits(cmd, CMDQ_SYNC_0_CS_NONE, CMDQ_SYNC_0_CS); queue_write(Q_ENT(q, cons), cmd, q->ent_dwords); } static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu) { - __arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq.q); + __arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq); } /* @@ -591,11 +614,11 @@ static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq, /* Wait for the command queue to become non-full */ static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq *cmdq, struct arm_smmu_ll_queue *llq) { unsigned long flags; struct arm_smmu_queue_poll qp; - struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu); int ret = 0; /* @@ -626,11 +649,11 @@ static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu, * Must be called with the cmdq lock held in some capacity. */ static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq *cmdq, struct arm_smmu_ll_queue *llq) { int ret = 0; struct arm_smmu_queue_poll qp; - struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu); u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod)); queue_poll_init(smmu, &qp); @@ -650,10 +673,10 @@ static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu, * Must be called with the cmdq lock held in some capacity. */ static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq *cmdq, struct arm_smmu_ll_queue *llq) { struct arm_smmu_queue_poll qp; - struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu); u32 prod = llq->prod; int ret = 0; @@ -700,12 +723,14 @@ static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu, } static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq *cmdq, struct arm_smmu_ll_queue *llq) { - if (smmu->options & ARM_SMMU_OPT_MSIPOLL) - return __arm_smmu_cmdq_poll_until_msi(smmu, llq); + if (smmu->options & ARM_SMMU_OPT_MSIPOLL && + !arm_smmu_cmdq_needs_busy_polling(smmu, cmdq)) + return __arm_smmu_cmdq_poll_until_msi(smmu, cmdq, llq); - return __arm_smmu_cmdq_poll_until_consumed(smmu, llq); + return __arm_smmu_cmdq_poll_until_consumed(smmu, cmdq, llq); } static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds, @@ -742,13 +767,13 @@ static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds, * CPU will appear before any of the commands from the other CPU. */ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq *cmdq, u64 *cmds, int n, bool sync) { u64 cmd_sync[CMDQ_ENT_DWORDS]; u32 prod; unsigned long flags; bool owner; - struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu); struct arm_smmu_ll_queue llq, head; int ret = 0; @@ -762,7 +787,7 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu, while (!queue_has_space(&llq, n + sync)) { local_irq_restore(flags); - if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq)) + if (arm_smmu_cmdq_poll_until_not_full(smmu, cmdq, &llq)) dev_err_ratelimited(smmu->dev, "CMDQ timeout\n"); local_irq_save(flags); } @@ -788,7 +813,7 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu, arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n); if (sync) { prod = queue_inc_prod_n(&llq, n); - arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, &cmdq->q, prod); + arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, cmdq, prod); queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS); /* @@ -838,7 +863,7 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu, /* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */ if (sync) { llq.prod = queue_inc_prod_n(&llq, n); - ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq); + ret = arm_smmu_cmdq_poll_until_sync(smmu, cmdq, &llq); if (ret) { dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n", @@ -873,7 +898,8 @@ static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu, return -EINVAL; } - return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, sync); + return arm_smmu_cmdq_issue_cmdlist( + smmu, arm_smmu_get_cmdq(smmu, ent), cmd, 1, sync); } static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu, @@ -888,21 +914,33 @@ static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu, return __arm_smmu_cmdq_issue_cmd(smmu, ent, true); } +static void arm_smmu_cmdq_batch_init(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq_batch *cmds, + struct arm_smmu_cmdq_ent *ent) +{ + cmds->num = 0; + cmds->cmdq = arm_smmu_get_cmdq(smmu, ent); +} + static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu, struct arm_smmu_cmdq_batch *cmds, struct arm_smmu_cmdq_ent *cmd) { + bool unsupported_cmd = !arm_smmu_cmdq_supports_cmd(cmds->cmdq, cmd); + bool force_sync = (cmds->num == CMDQ_BATCH_ENTRIES - 1) && + (smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC); int index; - if (cmds->num == CMDQ_BATCH_ENTRIES - 1 && - (smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC)) { - arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true); - cmds->num = 0; + if (force_sync || unsupported_cmd) { + arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds, + cmds->num, true); + arm_smmu_cmdq_batch_init(smmu, cmds, cmd); } if (cmds->num == CMDQ_BATCH_ENTRIES) { - arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false); - cmds->num = 0; + arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds, + cmds->num, false); + arm_smmu_cmdq_batch_init(smmu, cmds, cmd); } index = cmds->num * CMDQ_ENT_DWORDS; @@ -918,7 +956,9 @@ static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu, static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu, struct arm_smmu_cmdq_batch *cmds) { - return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true); + return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds, + cmds->num, true); + } static int arm_smmu_page_response(struct device *dev, @@ -985,7 +1025,7 @@ static void arm_smmu_sync_cd(struct arm_smmu_master *master, }, }; - cmds.num = 0; + arm_smmu_cmdq_batch_init(smmu, &cmds, &cmd); for (i = 0; i < master->num_streams; i++) { cmd.cfgi.sid = master->streams[i].id; arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd); @@ -1786,7 +1826,7 @@ static int arm_smmu_atc_inv_master(struct arm_smmu_master *master) arm_smmu_atc_inv_to_cmd(IOMMU_NO_PASID, 0, 0, &cmd); - cmds.num = 0; + arm_smmu_cmdq_batch_init(master->smmu, &cmds, &cmd); for (i = 0; i < master->num_streams; i++) { cmd.atc.sid = master->streams[i].id; arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd); @@ -1800,7 +1840,9 @@ int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid, { int i; unsigned long flags; - struct arm_smmu_cmdq_ent cmd; + struct arm_smmu_cmdq_ent cmd = { + .opcode = CMDQ_OP_ATC_INV, + }; struct arm_smmu_master *master; struct arm_smmu_cmdq_batch cmds; @@ -1826,7 +1868,7 @@ int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid, arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd); - cmds.num = 0; + arm_smmu_cmdq_batch_init(smmu_domain->smmu, &cmds, &cmd); spin_lock_irqsave(&smmu_domain->devices_lock, flags); list_for_each_entry(master, &smmu_domain->devices, domain_head) { @@ -1903,7 +1945,7 @@ static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd, num_pages++; } - cmds.num = 0; + arm_smmu_cmdq_batch_init(smmu, &cmds, cmd); while (iova < end) { if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) { @@ -2872,12 +2914,10 @@ static struct iommu_ops arm_smmu_ops = { }; /* Probing and initialisation functions */ -static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu, - struct arm_smmu_queue *q, - void __iomem *page, - unsigned long prod_off, - unsigned long cons_off, - size_t dwords, const char *name) +int arm_smmu_init_one_queue(struct arm_smmu_device *smmu, + struct arm_smmu_queue *q, void __iomem *page, + unsigned long prod_off, unsigned long cons_off, + size_t dwords, const char *name) { size_t qsz; @@ -2915,9 +2955,9 @@ static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu, return 0; } -static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu) +int arm_smmu_cmdq_init(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq *cmdq) { - struct arm_smmu_cmdq *cmdq = &smmu->cmdq; unsigned int nents = 1 << cmdq->q.llq.max_n_shift; atomic_set(&cmdq->owner_prod, 0); @@ -2942,7 +2982,7 @@ static int arm_smmu_init_queues(struct arm_smmu_device *smmu) if (ret) return ret; - ret = arm_smmu_cmdq_init(smmu); + ret = arm_smmu_cmdq_init(smmu, &smmu->cmdq); if (ret) return ret; @@ -3088,7 +3128,14 @@ static int arm_smmu_init_structures(struct arm_smmu_device *smmu) if (ret) return ret; - return arm_smmu_init_strtab(smmu); + ret = arm_smmu_init_strtab(smmu); + if (ret) + return ret; + + if (smmu->impl_ops && smmu->impl_ops->init_structures) + return smmu->impl_ops->init_structures(smmu); + + return 0; } static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val, @@ -3149,6 +3196,15 @@ static void arm_smmu_setup_msis(struct arm_smmu_device *smmu) int ret, nvec = ARM_SMMU_MAX_MSIS; struct device *dev = smmu->dev; + /* Clear the MSI address regs */ + writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0); + writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0); + + if (smmu->features & ARM_SMMU_FEAT_PRI) + writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0); + else + nvec--; + if (!(smmu->features & ARM_SMMU_FEAT_MSI)) return; @@ -3157,9 +3213,6 @@ static void arm_smmu_setup_msis(struct arm_smmu_device *smmu) return; } - if (!(smmu->features & ARM_SMMU_FEAT_PRI)) - nvec--; - /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */ ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg); if (ret) { @@ -3221,9 +3274,9 @@ static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu) } } -static int arm_smmu_reset_irqs(struct arm_smmu_device *smmu) +static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu) { - int ret; + int ret, irq; u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN; /* Disable IRQs first */ @@ -3234,35 +3287,7 @@ static int arm_smmu_reset_irqs(struct arm_smmu_device *smmu) return ret; } - if (!smmu->combined_irq) { - /* - * Clear the MSI address regs. These registers will be reset - * in arm_smmu_write_msi_msg callback function by irq_domain - * upon a new MSI message. - */ - writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0); - writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0); - - if (smmu->features & ARM_SMMU_FEAT_PRI) - writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0); - } - - if (smmu->features & ARM_SMMU_FEAT_PRI) - irqen_flags |= IRQ_CTRL_PRIQ_IRQEN; - - /* Enable interrupt generation on the SMMU */ - ret = arm_smmu_write_reg_sync(smmu, irqen_flags, - ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK); - if (ret) - dev_warn(smmu->dev, "failed to enable irqs\n"); - - return ret; -} - -static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu) -{ - int ret = 0, irq = smmu->combined_irq; - + irq = smmu->combined_irq; if (irq) { /* * Cavium ThunderX2 implementation doesn't support unique irq @@ -3278,7 +3303,16 @@ static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu) } else arm_smmu_setup_unique_irqs(smmu); - return ret; + if (smmu->features & ARM_SMMU_FEAT_PRI) + irqen_flags |= IRQ_CTRL_PRIQ_IRQEN; + + /* Enable interrupt generation on the SMMU */ + ret = arm_smmu_write_reg_sync(smmu, irqen_flags, + ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK); + if (ret) + dev_warn(smmu->dev, "failed to enable irqs\n"); + + return 0; } static int arm_smmu_device_disable(struct arm_smmu_device *smmu) @@ -3292,7 +3326,7 @@ static int arm_smmu_device_disable(struct arm_smmu_device *smmu) return ret; } -static int arm_smmu_device_reset(struct arm_smmu_device *smmu) +static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass) { int ret; u32 reg, enables; @@ -3400,17 +3434,11 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu) } } - ret = arm_smmu_reset_irqs(smmu); - if (ret) { - dev_err(smmu->dev, "failed to reset irqs\n"); - return ret; - } - if (is_kdump_kernel()) enables &= ~(CR0_EVTQEN | CR0_PRIQEN); /* Enable the SMMU interface, or ensure bypass */ - if (!smmu->bypass || disable_bypass) { + if (!bypass || disable_bypass) { enables |= CR0_SMMUEN; } else { ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT); @@ -3424,6 +3452,14 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu) return ret; } + if (smmu->impl_ops && smmu->impl_ops->device_reset) { + ret = smmu->impl_ops->device_reset(smmu); + if (ret) { + dev_err(smmu->dev, "failed to reset impl\n"); + return ret; + } + } + return 0; } @@ -3685,19 +3721,84 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) return 0; } -#ifdef CONFIG_ACPI -static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu) +#ifdef CONFIG_TEGRA241_CMDQV +static void tegra_cmdqv_dt_probe(struct device_node *smmu_node, + struct arm_smmu_device *smmu) { - switch (model) { + struct platform_device *pdev; + struct device_node *np; + + np = of_parse_phandle(smmu_node, "nvidia,cmdqv", 0); + if (!np) + return; + + pdev = of_find_device_by_node(np); + of_node_put(np); + if (!pdev) + return; + + smmu->impl_dev = &pdev->dev; + smmu->options |= ARM_SMMU_OPT_TEGRA241_CMDQV; + dev_info(smmu->dev, "found companion CMDQV device: %s\n", + dev_name(smmu->impl_dev)); +} +#else +static void tegra_cmdqv_dt_probe(struct device_node *smmu_node, + struct arm_smmu_device *smmu) +{ +} +#endif + +#ifdef CONFIG_ACPI +#ifdef CONFIG_TEGRA241_CMDQV +static void acpi_smmu_dsdt_probe_tegra241_cmdqv(struct acpi_iort_node *node, + struct arm_smmu_device *smmu) +{ + const char *uid = kasprintf(GFP_KERNEL, "%u", node->identifier); + struct acpi_device *adev; + + /* Look for an NVDA200C node whose _UID matches the SMMU node ID */ + adev = acpi_dev_get_first_match_dev("NVDA200C", uid, -1); + if (adev) { + /* Tegra241 CMDQV driver is responsible for put_device() */ + smmu->impl_dev = &adev->dev; + smmu->options |= ARM_SMMU_OPT_TEGRA241_CMDQV; + dev_info(smmu->dev, "found companion CMDQV device: %s\n", + dev_name(smmu->impl_dev)); + } + kfree(uid); +} +#else +static void acpi_smmu_dsdt_probe_tegra241_cmdqv(struct acpi_iort_node *node, + struct arm_smmu_device *smmu) +{ +} +#endif + +static int acpi_smmu_iort_probe_model(struct acpi_iort_node *node, + struct arm_smmu_device *smmu) +{ + struct acpi_iort_smmu_v3 *iort_smmu = + (struct acpi_iort_smmu_v3 *)node->node_data; + + switch (iort_smmu->model) { case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX: smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY; break; case ACPI_IORT_SMMU_V3_HISILICON_HI161X: smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH; break; + case ACPI_IORT_SMMU_V3_GENERIC: + /* + * Tegra241 implementation stores its SMMU options and impl_dev + * in DSDT. Thus, go through the ACPI tables unconditionally. + */ + acpi_smmu_dsdt_probe_tegra241_cmdqv(node, smmu); + break; } dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options); + return 0; } static int arm_smmu_device_acpi_probe(struct platform_device *pdev, @@ -3712,12 +3813,10 @@ static int arm_smmu_device_acpi_probe(struct platform_device *pdev, /* Retrieve SMMUv3 specific data */ iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data; - acpi_smmu_get_options(iort_smmu->model, smmu); - if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE) smmu->features |= ARM_SMMU_FEAT_COHERENCY; - return 0; + return acpi_smmu_iort_probe_model(node, smmu); } #else static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev, @@ -3734,6 +3833,9 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev, u32 cells; int ret = -EINVAL; + if (!of_reserved_mem_device_init(dev)) + dev_info(dev, "using device-specific reserved memory\n"); + if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells)) dev_err(dev, "missing #iommu-cells property\n"); else if (cells != 1) @@ -3746,6 +3848,8 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev, if (of_dma_is_coherent(dev->of_node)) smmu->features |= ARM_SMMU_FEAT_COHERENCY; + tegra_cmdqv_dt_probe(dev->of_node, smmu); + return ret; } @@ -3795,6 +3899,39 @@ static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu) iort_put_rmr_sids(dev_fwnode(smmu->dev), &rmr_list); } +static void arm_smmu_impl_remove(void *data) +{ + struct arm_smmu_device *smmu = data; + + if (smmu->impl_ops && smmu->impl_ops->device_remove) + smmu->impl_ops->device_remove(smmu); +} + +/* + * Probe all the compiled in implementations. Each one checks to see if it + * matches this HW and if so returns a devm_krealloc'd arm_smmu_device which + * replaces the callers. Otherwise the original is returned or ERR_PTR. + */ +static struct arm_smmu_device *arm_smmu_impl_probe(struct arm_smmu_device *smmu) +{ + struct arm_smmu_device *new_smmu = ERR_PTR(-ENODEV); + int ret; + + if (smmu->impl_dev && (smmu->options & ARM_SMMU_OPT_TEGRA241_CMDQV)) + new_smmu = tegra241_cmdqv_probe(smmu); + + if (new_smmu == ERR_PTR(-ENODEV)) + return smmu; + if (IS_ERR(new_smmu)) + return new_smmu; + + ret = devm_add_action_or_reset(new_smmu->dev, arm_smmu_impl_remove, + new_smmu); + if (ret) + return ERR_PTR(ret); + return new_smmu; +} + static int arm_smmu_device_probe(struct platform_device *pdev) { int irq, ret; @@ -3815,10 +3952,13 @@ static int arm_smmu_device_probe(struct platform_device *pdev) if (ret == -ENODEV) return ret; } - /* Set bypass mode according to firmware probing result */ smmu->bypass = !!ret; + smmu = arm_smmu_impl_probe(smmu); + if (IS_ERR(smmu)) + return PTR_ERR(smmu); + /* Base address */ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!res) @@ -3880,15 +4020,17 @@ static int arm_smmu_device_probe(struct platform_device *pdev) /* Check for RMRs and install bypass STEs if any */ arm_smmu_rmr_install_bypass_ste(smmu); - ret = arm_smmu_setup_irqs(smmu); - if (ret) - return ret; - /* Reset the device */ - ret = arm_smmu_device_reset(smmu); + ret = arm_smmu_device_reset(smmu, smmu->bypass); if (ret) goto err_disable; + ret = arm_smmu_setup_irqs(smmu); + if (ret) { + dev_err(smmu->dev, "failed to setup irqs\n"); + return ret; + } + /* And we're up. Go go go! */ ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL, "smmu3.%pa", &ioaddr); @@ -3946,11 +4088,32 @@ static int __maybe_unused arm_smmu_runtime_resume(struct device *dev) { struct arm_smmu_device *smmu = dev_get_drvdata(dev); - return arm_smmu_device_reset(smmu); + dev_dbg(dev, "Resuming\n"); + arm_smmu_device_reset(smmu, smmu->bypass); + + return 0; +} + +static int __maybe_unused arm_smmu_runtime_suspend(struct device *dev) +{ + struct arm_smmu_device *smmu = dev_get_drvdata(dev); + struct arm_smmu_cmdq_ent cmd; + + cmd.opcode = CMDQ_OP_CFGI_ALL; + arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd); + + cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL; + arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd); + + dev_dbg(dev, "Disabling\n"); + arm_smmu_device_disable(smmu); + + dev_dbg(dev, "Suspending\n"); + return 0; } static const struct dev_pm_ops arm_smmu_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(NULL, arm_smmu_runtime_resume) + SET_LATE_SYSTEM_SLEEP_PM_OPS(arm_smmu_runtime_suspend, arm_smmu_runtime_resume) }; static struct platform_driver arm_smmu_driver = { diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index a9b8a76e5c18..b01adaba07e5 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -14,6 +14,8 @@ #include #include +struct arm_smmu_device; + /* MMIO registers */ #define ARM_SMMU_IDR0 0x0 #define IDR0_ST_LVL GENMASK(28, 27) @@ -555,10 +557,18 @@ struct arm_smmu_cmdq { atomic_long_t *valid_map; atomic_t owner_prod; atomic_t lock; + bool (*supports_cmd)(struct arm_smmu_cmdq_ent *ent); }; +static inline bool arm_smmu_cmdq_supports_cmd(struct arm_smmu_cmdq *cmdq, + struct arm_smmu_cmdq_ent *ent) +{ + return cmdq->supports_cmd ? cmdq->supports_cmd(ent) : true; +} + struct arm_smmu_cmdq_batch { u64 cmds[CMDQ_BATCH_ENTRIES * CMDQ_ENT_DWORDS]; + struct arm_smmu_cmdq *cmdq; int num; }; @@ -623,9 +633,20 @@ struct arm_smmu_strtab_cfg { u32 strtab_base_cfg; }; +struct arm_smmu_impl_ops { + int (*device_reset)(struct arm_smmu_device *smmu); + void (*device_remove)(struct arm_smmu_device *smmu); + int (*init_structures)(struct arm_smmu_device *smmu); + struct arm_smmu_cmdq *(*get_secondary_cmdq)( + struct arm_smmu_device *smmu, struct arm_smmu_cmdq_ent *ent); +}; + /* An SMMUv3 instance */ struct arm_smmu_device { struct device *dev; + struct device *impl_dev; + const struct arm_smmu_impl_ops *impl_ops; + void __iomem *base; void __iomem *page1; @@ -655,6 +676,7 @@ struct arm_smmu_device { #define ARM_SMMU_OPT_PAGE0_REGS_ONLY (1 << 1) #define ARM_SMMU_OPT_MSIPOLL (1 << 2) #define ARM_SMMU_OPT_CMDQ_FORCE_SYNC (1 << 3) +#define ARM_SMMU_OPT_TEGRA241_CMDQV (1 << 4) u32 options; struct arm_smmu_cmdq cmdq; @@ -686,7 +708,7 @@ struct arm_smmu_device { struct rb_root streams; struct mutex streams_mutex; - bool bypass; + bool bypass; }; struct arm_smmu_stream { @@ -760,6 +782,15 @@ bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd); int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid, unsigned long iova, size_t size); +void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq *cmdq); +int arm_smmu_init_one_queue(struct arm_smmu_device *smmu, + struct arm_smmu_queue *q, void __iomem *page, + unsigned long prod_off, unsigned long cons_off, + size_t dwords, const char *name); +int arm_smmu_cmdq_init(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq *cmdq); + #ifdef CONFIG_ARM_SMMU_V3_SVA bool arm_smmu_sva_supported(struct arm_smmu_device *smmu); bool arm_smmu_master_sva_supported(struct arm_smmu_master *master); @@ -815,4 +846,14 @@ static inline void arm_smmu_sva_remove_dev_pasid(struct iommu_domain *domain, { } #endif /* CONFIG_ARM_SMMU_V3_SVA */ + +#ifdef CONFIG_TEGRA241_CMDQV +struct arm_smmu_device *tegra241_cmdqv_probe(struct arm_smmu_device *smmu); +#else /* CONFIG_TEGRA241_CMDQV */ +static inline struct arm_smmu_device * +tegra241_cmdqv_probe(struct arm_smmu_device *smmu) +{ + return ERR_PTR(-ENODEV); +} +#endif /* CONFIG_TEGRA241_CMDQV */ #endif /* _ARM_SMMU_V3_H */ diff --git a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c new file mode 100644 index 000000000000..388a9b891fd9 --- /dev/null +++ b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c @@ -0,0 +1,941 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2021-2024 NVIDIA CORPORATION & AFFILIATES. */ + +#define dev_fmt(fmt) "tegra241_cmdqv: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "arm-smmu-v3.h" + +/* CMDQV register page base and size defines */ +#define TEGRA241_CMDQV_CONFIG_BASE (0) +#define TEGRA241_CMDQV_CONFIG_SIZE (SZ_64K) +#define TEGRA241_VCMDQ_PAGE0_BASE (TEGRA241_CMDQV_CONFIG_BASE + SZ_64K) +#define TEGRA241_VCMDQ_PAGE1_BASE (TEGRA241_VCMDQ_PAGE0_BASE + SZ_64K) +#define TEGRA241_VINTF_PAGE_BASE (TEGRA241_VCMDQ_PAGE1_BASE + SZ_64K) + +/* CMDQV global base regs */ +#define TEGRA241_CMDQV_CONFIG 0x0000 +#define CMDQV_EN BIT(0) + +#define TEGRA241_CMDQV_PARAM 0x0004 +#define CMDQV_NUM_VINTF_LOG2 GENMASK(11, 8) +#define CMDQV_NUM_VCMDQ_LOG2 GENMASK(7, 4) + +#define TEGRA241_CMDQV_STATUS 0x0008 +#define CMDQV_ENABLED BIT(0) + +#define TEGRA241_CMDQV_VINTF_ERR_MAP 0x0014 +#define TEGRA241_CMDQV_VINTF_INT_MASK 0x001C +#define TEGRA241_CMDQV_CMDQ_ERR_MAP(m) (0x0024 + 0x4*(m)) + +#define TEGRA241_CMDQV_CMDQ_ALLOC(q) (0x0200 + 0x4*(q)) +#define CMDQV_CMDQ_ALLOC_VINTF GENMASK(20, 15) +#define CMDQV_CMDQ_ALLOC_LVCMDQ GENMASK(7, 1) +#define CMDQV_CMDQ_ALLOCATED BIT(0) + +/* VINTF base regs */ +#define TEGRA241_VINTF(v) (0x1000 + 0x100*(v)) + +#define TEGRA241_VINTF_CONFIG 0x0000 +#define VINTF_HYP_OWN BIT(17) +#define VINTF_VMID GENMASK(16, 1) +#define VINTF_EN BIT(0) + +#define TEGRA241_VINTF_STATUS 0x0004 +#define VINTF_STATUS GENMASK(3, 1) +#define VINTF_ENABLED BIT(0) + +#define TEGRA241_VINTF_LVCMDQ_ERR_MAP_64(m) \ + (0x00C0 + 0x8*(m)) +#define LVCMDQ_ERR_MAP_NUM_64 2 + +/* VCMDQ base regs */ +/* -- PAGE0 -- */ +#define TEGRA241_VCMDQ_PAGE0(q) (TEGRA241_VCMDQ_PAGE0_BASE + 0x80*(q)) + +#define TEGRA241_VCMDQ_CONS 0x00000 +#define VCMDQ_CONS_ERR GENMASK(30, 24) + +#define TEGRA241_VCMDQ_PROD 0x00004 + +#define TEGRA241_VCMDQ_CONFIG 0x00008 +#define VCMDQ_EN BIT(0) + +#define TEGRA241_VCMDQ_STATUS 0x0000C +#define VCMDQ_ENABLED BIT(0) + +#define TEGRA241_VCMDQ_GERROR 0x00010 +#define TEGRA241_VCMDQ_GERRORN 0x00014 + +/* -- PAGE1 -- */ +#define TEGRA241_VCMDQ_PAGE1(q) (TEGRA241_VCMDQ_PAGE1_BASE + 0x80*(q)) +#define VCMDQ_ADDR GENMASK(47, 5) +#define VCMDQ_LOG2SIZE GENMASK(4, 0) + +#define TEGRA241_VCMDQ_BASE 0x00000 +#define TEGRA241_VCMDQ_BASE_H 0x00004 +#define TEGRA241_VCMDQ_CONS_INDX_BASE 0x00008 +#define TEGRA241_VCMDQ_CONS_INDX_BASE_H 0x0000C + +/* VINTF logical-VCMDQ pages */ +#define TEGRA241_VINTFi_PAGE0(i) (TEGRA241_VINTF_PAGE_BASE + SZ_128K*(i)) +#define TEGRA241_VINTFi_PAGE1(i) (TEGRA241_VINTFi_PAGE0(i) + SZ_64K) +#define TEGRA241_VINTFi_LVCMDQ_PAGE0(i, q) \ + (TEGRA241_VINTFi_PAGE0(i) + 0x80*(q)) +#define TEGRA241_VINTFi_LVCMDQ_PAGE1(i, q) \ + (TEGRA241_VINTFi_PAGE1(i) + 0x80*(q)) + +/* MMIO helpers */ +#define REG_CMDQV(_cmdqv, _regname) \ + ((_cmdqv)->base + TEGRA241_CMDQV_##_regname) +#define REG_VINTF(_vintf, _regname) \ + ((_vintf)->base + TEGRA241_VINTF_##_regname) +#define REG_VCMDQ_PAGE0(_vcmdq, _regname) \ + ((_vcmdq)->page0 + TEGRA241_VCMDQ_##_regname) +#define REG_VCMDQ_PAGE1(_vcmdq, _regname) \ + ((_vcmdq)->page1 + TEGRA241_VCMDQ_##_regname) + + +static bool disable_cmdqv; +module_param(disable_cmdqv, bool, 0444); +MODULE_PARM_DESC(disable_cmdqv, + "This allows to disable CMDQV HW and use default SMMU internal CMDQ."); + +static bool bypass_vcmdq; +module_param(bypass_vcmdq, bool, 0444); +MODULE_PARM_DESC(bypass_vcmdq, + "This allows to bypass VCMDQ for debugging use or perf comparison."); + +/** + * struct tegra241_vcmdq - Virtual Command Queue + * @idx: Global index in the CMDQV + * @lidx: Local index in the VINTF + * @enabled: Enable status + * @cmdqv: Parent CMDQV pointer + * @vintf: Parent VINTF pointer + * @cmdq: Command Queue struct + * @page0: MMIO Page0 base address + * @page1: MMIO Page1 base address + */ +struct tegra241_vcmdq { + u16 idx; + u16 lidx; + + bool enabled; + + struct tegra241_cmdqv *cmdqv; + struct tegra241_vintf *vintf; + struct arm_smmu_cmdq cmdq; + + void __iomem *page0; + void __iomem *page1; +}; + +/** + * struct tegra241_vintf - Virtual Interface + * @idx: Global index in the CMDQV + * @enabled: Enable status + * @hyp_own: Owned by hypervisor (in-kernel) + * @cmdqv: Parent CMDQV pointer + * @lvcmdqs: List of logical VCMDQ pointers + * @base: MMIO base address + */ +struct tegra241_vintf { + u16 idx; + + bool enabled; + bool hyp_own; + + struct tegra241_cmdqv *cmdqv; + struct tegra241_vcmdq **lvcmdqs; + + void __iomem *base; +}; + +/** + * struct tegra241_cmdqv - CMDQ-V for SMMUv3 + * @smmu: SMMUv3 device + * @dev: CMDQV device + * @base: MMIO base address + * @irq: IRQ number + * @num_vintfs: Total number of VINTFs + * @num_vcmdqs: Total number of VCMDQs + * @num_lvcmdqs_per_vintf: Number of logical VCMDQs per VINTF + * @vintf_ids: VINTF id allocator + * @vintfs: List of VINTFs + */ +struct tegra241_cmdqv { + struct arm_smmu_device smmu; + struct device *dev; + + void __iomem *base; + int irq; + + /* CMDQV Hardware Params */ + u16 num_vintfs; + u16 num_vcmdqs; + u16 num_lvcmdqs_per_vintf; + + struct ida vintf_ids; + + struct tegra241_vintf **vintfs; +}; + +/* Config and Polling Helpers */ + +static inline int tegra241_cmdqv_write_config(struct tegra241_cmdqv *cmdqv, + void __iomem *addr_config, + void __iomem *addr_status, + u32 regval, const char *header, + bool *out_enabled) +{ + bool en = regval & BIT(0); + int ret; + + writel(regval, addr_config); + ret = readl_poll_timeout(addr_status, regval, + en ? regval & BIT(0) : !(regval & BIT(0)), + 1, ARM_SMMU_POLL_TIMEOUT_US); + if (ret) + dev_err(cmdqv->dev, "%sfailed to %sable, STATUS=0x%08X\n", + header, en ? "en" : "dis", regval); + if (out_enabled) + WRITE_ONCE(*out_enabled, regval & BIT(0)); + return ret; +} + +static inline int cmdqv_write_config(struct tegra241_cmdqv *cmdqv, u32 regval) +{ + return tegra241_cmdqv_write_config(cmdqv, + REG_CMDQV(cmdqv, CONFIG), + REG_CMDQV(cmdqv, STATUS), + regval, "CMDQV: ", NULL); +} + +static inline int vintf_write_config(struct tegra241_vintf *vintf, u32 regval) +{ + char header[16]; + + snprintf(header, 16, "VINTF%u: ", vintf->idx); + return tegra241_cmdqv_write_config(vintf->cmdqv, + REG_VINTF(vintf, CONFIG), + REG_VINTF(vintf, STATUS), + regval, header, &vintf->enabled); +} + +static inline char *lvcmdq_error_header(struct tegra241_vcmdq *vcmdq, + char *header, int hlen) +{ + WARN_ON(hlen < 64); + if (WARN_ON(!vcmdq->vintf)) + return ""; + snprintf(header, hlen, "VINTF%u: VCMDQ%u/LVCMDQ%u: ", + vcmdq->vintf->idx, vcmdq->idx, vcmdq->lidx); + return header; +} + +static inline int vcmdq_write_config(struct tegra241_vcmdq *vcmdq, u32 regval) +{ + char header[64], *h = lvcmdq_error_header(vcmdq, header, 64); + + return tegra241_cmdqv_write_config(vcmdq->cmdqv, + REG_VCMDQ_PAGE0(vcmdq, CONFIG), + REG_VCMDQ_PAGE0(vcmdq, STATUS), + regval, h, &vcmdq->enabled); +} + +/* ISR Functions */ + +static void tegra241_vintf0_handle_error(struct tegra241_vintf *vintf) +{ + int i; + + for (i = 0; i < LVCMDQ_ERR_MAP_NUM_64; i++) { + u64 map = readq_relaxed(REG_VINTF(vintf, LVCMDQ_ERR_MAP_64(i))); + + while (map) { + unsigned long lidx = __ffs64(map); + struct tegra241_vcmdq *vcmdq = vintf->lvcmdqs[lidx]; + u32 gerror = readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERROR)); + + __arm_smmu_cmdq_skip_err(&vintf->cmdqv->smmu, &vcmdq->cmdq); + writel(gerror, REG_VCMDQ_PAGE0(vcmdq, GERRORN)); + map &= ~BIT_ULL(lidx); + } + } +} + +static irqreturn_t tegra241_cmdqv_isr(int irq, void *devid) +{ + struct tegra241_cmdqv *cmdqv = (struct tegra241_cmdqv *)devid; + void __iomem *reg_vintf_map = REG_CMDQV(cmdqv, VINTF_ERR_MAP); + char err_str[256]; + u64 vintf_map; + + /* Use readl_relaxed() as register addresses are not 64-bit aligned */ + vintf_map = (u64)readl_relaxed(reg_vintf_map + 0x4) << 32 | + (u64)readl_relaxed(reg_vintf_map); + + snprintf(err_str, sizeof(err_str), + "vintf_map: %016llx, vcmdq_map %08x:%08x:%08x:%08x", vintf_map, + readl_relaxed(REG_CMDQV(cmdqv, CMDQ_ERR_MAP(3))), + readl_relaxed(REG_CMDQV(cmdqv, CMDQ_ERR_MAP(2))), + readl_relaxed(REG_CMDQV(cmdqv, CMDQ_ERR_MAP(1))), + readl_relaxed(REG_CMDQV(cmdqv, CMDQ_ERR_MAP(0)))); + + dev_warn(cmdqv->dev, "unexpected error reported. %s\n", err_str); + + /* Handle VINTF0 and its LVCMDQs */ + if (vintf_map & BIT_ULL(0)) { + tegra241_vintf0_handle_error(cmdqv->vintfs[0]); + vintf_map &= ~BIT_ULL(0); + } + + return IRQ_HANDLED; +} + +/* Command Queue Function */ + +static bool tegra241_guest_vcmdq_supports_cmd(struct arm_smmu_cmdq_ent *ent) +{ + switch (ent->opcode) { + case CMDQ_OP_TLBI_NH_ASID: + case CMDQ_OP_TLBI_NH_VA: + case CMDQ_OP_ATC_INV: + return true; + default: + return false; + } +} + +static struct arm_smmu_cmdq * +tegra241_cmdqv_get_cmdq(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq_ent *ent) +{ + struct tegra241_cmdqv *cmdqv = + container_of(smmu, struct tegra241_cmdqv, smmu); + struct tegra241_vintf *vintf = cmdqv->vintfs[0]; + struct tegra241_vcmdq *vcmdq; + u16 lidx; + + if (READ_ONCE(bypass_vcmdq)) + return NULL; + + /* Use SMMU CMDQ if VINTF0 is uninitialized */ + if (!READ_ONCE(vintf->enabled)) + return NULL; + + /* + * Select a LVCMDQ to use. Here we use a temporal solution to + * balance out traffic on cmdq issuing: each cmdq has its own + * lock, if all cpus issue cmdlist using the same cmdq, only + * one CPU at a time can enter the process, while the others + * will be spinning at the same lock. + */ + lidx = raw_smp_processor_id() % cmdqv->num_lvcmdqs_per_vintf; + vcmdq = vintf->lvcmdqs[lidx]; + if (!vcmdq || !READ_ONCE(vcmdq->enabled)) + return NULL; + + /* Unsupported CMD goes for smmu->cmdq pathway */ + if (!arm_smmu_cmdq_supports_cmd(&vcmdq->cmdq, ent)) + return NULL; + return &vcmdq->cmdq; +} + +/* HW Reset Functions */ + +static void tegra241_vcmdq_hw_deinit(struct tegra241_vcmdq *vcmdq) +{ + char header[64], *h = lvcmdq_error_header(vcmdq, header, 64); + u32 gerrorn, gerror; + + if (vcmdq_write_config(vcmdq, 0)) { + dev_err(vcmdq->cmdqv->dev, + "%sGERRORN=0x%X, GERROR=0x%X, CONS=0x%X\n", h, + readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERRORN)), + readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERROR)), + readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, CONS))); + } + writel_relaxed(0, REG_VCMDQ_PAGE0(vcmdq, PROD)); + writel_relaxed(0, REG_VCMDQ_PAGE0(vcmdq, CONS)); + writel_relaxed(0, REG_VCMDQ_PAGE1(vcmdq, BASE_H)); + writel_relaxed(0, REG_VCMDQ_PAGE1(vcmdq, BASE)); + writel_relaxed(0, REG_VCMDQ_PAGE1(vcmdq, CONS_INDX_BASE_H)); + writel_relaxed(0, REG_VCMDQ_PAGE1(vcmdq, CONS_INDX_BASE)); + + gerrorn = readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERRORN)); + gerror = readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERROR)); + if (gerror != gerrorn) { + dev_warn(vcmdq->cmdqv->dev, + "%suncleared error detected, resetting\n", h); + writel(gerror, REG_VCMDQ_PAGE0(vcmdq, GERRORN)); + } + + dev_dbg(vcmdq->cmdqv->dev, "%sdeinited\n", h); +} + +static int tegra241_vcmdq_hw_init(struct tegra241_vcmdq *vcmdq) +{ + char header[64], *h = lvcmdq_error_header(vcmdq, header, 64); + int ret; + + /* Reset VCMDQ */ + tegra241_vcmdq_hw_deinit(vcmdq); + + /* Configure and enable VCMDQ */ + writel_relaxed(upper_32_bits(vcmdq->cmdq.q.q_base), REG_VCMDQ_PAGE1(vcmdq, BASE_H)); + writel_relaxed(lower_32_bits(vcmdq->cmdq.q.q_base), REG_VCMDQ_PAGE1(vcmdq, BASE)); + writel_relaxed(vcmdq->cmdq.q.llq.prod, REG_VCMDQ_PAGE0(vcmdq, PROD)); + writel_relaxed(vcmdq->cmdq.q.llq.cons, REG_VCMDQ_PAGE0(vcmdq, CONS)); + + ret = vcmdq_write_config(vcmdq, VCMDQ_EN); + if (ret) { + dev_err(vcmdq->cmdqv->dev, + "%sGERRORN=0x%X, GERROR=0x%X, CONS=0x%X\n", h, + readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERRORN)), + readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERROR)), + readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, CONS))); + return ret; + } + + dev_dbg(vcmdq->cmdqv->dev, "%sinited\n", h); + return 0; +} + +static void tegra241_vintf_hw_deinit(struct tegra241_vintf *vintf) +{ + u16 lidx; + + for (lidx = 0; lidx < vintf->cmdqv->num_lvcmdqs_per_vintf; lidx++) + if (vintf->lvcmdqs && vintf->lvcmdqs[lidx]) + tegra241_vcmdq_hw_deinit(vintf->lvcmdqs[lidx]); + vintf_write_config(vintf, 0); +} + +static int tegra241_vintf_hw_init(struct tegra241_vintf *vintf, bool hyp_own) +{ + u32 regval; + u16 lidx; + int ret; + + /* Reset VINTF */ + tegra241_vintf_hw_deinit(vintf); + + /* Configure and enable VINTF */ + /* + * Note that HYP_OWN bit is wired to zero when running in guest kernel, + * whether enabling it here or not, as !HYP_OWN cmdq HWs only support a + * restricted set of supported commands. + */ + regval = FIELD_PREP(VINTF_HYP_OWN, hyp_own); + writel(regval, REG_VINTF(vintf, CONFIG)); + + ret = vintf_write_config(vintf, regval | VINTF_EN); + if (ret) + return ret; + /* + * As being mentioned above, HYP_OWN bit is wired to zero for a guest + * kernel, so read it back from HW to ensure that reflects in hyp_own + */ + vintf->hyp_own = !!(VINTF_HYP_OWN & readl(REG_VINTF(vintf, CONFIG))); + + for (lidx = 0; lidx < vintf->cmdqv->num_lvcmdqs_per_vintf; lidx++) { + if (vintf->lvcmdqs && vintf->lvcmdqs[lidx]) { + ret = tegra241_vcmdq_hw_init(vintf->lvcmdqs[lidx]); + if (ret) { + tegra241_vintf_hw_deinit(vintf); + return ret; + } + } + } + + return 0; +} + +static int tegra241_cmdqv_hw_reset(struct arm_smmu_device *smmu) +{ + struct tegra241_cmdqv *cmdqv = + container_of(smmu, struct tegra241_cmdqv, smmu); + u16 qidx, lidx, idx; + u32 regval; + int ret; + + /* Reset CMDQV */ + regval = readl_relaxed(REG_CMDQV(cmdqv, CONFIG)); + ret = cmdqv_write_config(cmdqv, regval & ~CMDQV_EN); + if (ret) + return ret; + ret = cmdqv_write_config(cmdqv, regval | CMDQV_EN); + if (ret) + return ret; + + /* Assign preallocated global VCMDQs to each VINTF as LVCMDQs */ + for (idx = 0, qidx = 0; idx < cmdqv->num_vintfs; idx++) { + for (lidx = 0; lidx < cmdqv->num_lvcmdqs_per_vintf; lidx++) { + regval = FIELD_PREP(CMDQV_CMDQ_ALLOC_VINTF, idx); + regval |= FIELD_PREP(CMDQV_CMDQ_ALLOC_LVCMDQ, lidx); + regval |= CMDQV_CMDQ_ALLOCATED; + writel_relaxed(regval, + REG_CMDQV(cmdqv, CMDQ_ALLOC(qidx++))); + } + } + + return tegra241_vintf_hw_init(cmdqv->vintfs[0], true); +} + +/* VCMDQ Resource Helpers */ + +static int tegra241_vcmdq_alloc_smmu_cmdq(struct tegra241_vcmdq *vcmdq) +{ + struct arm_smmu_device *smmu = &vcmdq->cmdqv->smmu; + struct arm_smmu_cmdq *cmdq = &vcmdq->cmdq; + struct arm_smmu_queue *q = &cmdq->q; + char name[16]; + u32 regval; + int ret; + + snprintf(name, 16, "vcmdq%u", vcmdq->idx); + + /* Cap queue size to SMMU's IDR1.CMDQS and ensure natural alignment */ + regval = readl_relaxed(smmu->base + ARM_SMMU_IDR1); + q->llq.max_n_shift = + min_t(u32, CMDQ_MAX_SZ_SHIFT, FIELD_GET(IDR1_CMDQS, regval)); + + /* Use the common helper to init the VCMDQ, and then... */ + ret = arm_smmu_init_one_queue(smmu, q, vcmdq->page0, + TEGRA241_VCMDQ_PROD, TEGRA241_VCMDQ_CONS, + CMDQ_ENT_DWORDS, name); + if (ret) + return ret; + + /* ...override q_base to write VCMDQ_BASE registers */ + q->q_base = q->base_dma & VCMDQ_ADDR; + q->q_base |= FIELD_PREP(VCMDQ_LOG2SIZE, q->llq.max_n_shift); + + if (!vcmdq->vintf->hyp_own) + cmdq->supports_cmd = tegra241_guest_vcmdq_supports_cmd; + + return arm_smmu_cmdq_init(smmu, cmdq); +} + +/* VINTF Logical VCMDQ Resource Helpers */ + +static void tegra241_vintf_deinit_lvcmdq(struct tegra241_vintf *vintf, u16 lidx) +{ + vintf->lvcmdqs[lidx] = NULL; +} + +static int tegra241_vintf_init_lvcmdq(struct tegra241_vintf *vintf, u16 lidx, + struct tegra241_vcmdq *vcmdq) +{ + struct tegra241_cmdqv *cmdqv = vintf->cmdqv; + u16 idx = vintf->idx; + + vcmdq->idx = idx * cmdqv->num_lvcmdqs_per_vintf + lidx; + vcmdq->lidx = lidx; + vcmdq->cmdqv = cmdqv; + vcmdq->vintf = vintf; + vcmdq->page0 = cmdqv->base + TEGRA241_VINTFi_LVCMDQ_PAGE0(idx, lidx); + vcmdq->page1 = cmdqv->base + TEGRA241_VINTFi_LVCMDQ_PAGE1(idx, lidx); + + vintf->lvcmdqs[lidx] = vcmdq; + return 0; +} + +static void tegra241_vintf_free_lvcmdq(struct tegra241_vintf *vintf, u16 lidx) +{ + struct tegra241_vcmdq *vcmdq = vintf->lvcmdqs[lidx]; + char header[64]; + + /* Note that the lvcmdq queue memory space is managed by devres */ + + tegra241_vintf_deinit_lvcmdq(vintf, lidx); + + dev_dbg(vintf->cmdqv->dev, + "%sdeallocated\n", lvcmdq_error_header(vcmdq, header, 64)); + kfree(vcmdq); +} + +static struct tegra241_vcmdq * +tegra241_vintf_alloc_lvcmdq(struct tegra241_vintf *vintf, u16 lidx) +{ + struct tegra241_cmdqv *cmdqv = vintf->cmdqv; + struct tegra241_vcmdq *vcmdq; + char header[64]; + int ret; + + vcmdq = kzalloc(sizeof(*vcmdq), GFP_KERNEL); + if (!vcmdq) + return ERR_PTR(-ENOMEM); + + ret = tegra241_vintf_init_lvcmdq(vintf, lidx, vcmdq); + if (ret) + goto free_vcmdq; + + /* Build an arm_smmu_cmdq for each LVCMDQ */ + ret = tegra241_vcmdq_alloc_smmu_cmdq(vcmdq); + if (ret) + goto deinit_lvcmdq; + + dev_dbg(cmdqv->dev, + "%sallocated\n", lvcmdq_error_header(vcmdq, header, 64)); + return vcmdq; + +deinit_lvcmdq: + tegra241_vintf_deinit_lvcmdq(vintf, lidx); +free_vcmdq: + kfree(vcmdq); + return ERR_PTR(ret); +} + +/* VINTF Resource Helpers */ + +static void tegra241_cmdqv_deinit_vintf(struct tegra241_cmdqv *cmdqv, u16 idx) +{ + kfree(cmdqv->vintfs[idx]->lvcmdqs); + ida_free(&cmdqv->vintf_ids, idx); + cmdqv->vintfs[idx] = NULL; +} + +static int tegra241_cmdqv_init_vintf(struct tegra241_cmdqv *cmdqv, u16 max_idx, + struct tegra241_vintf *vintf) +{ + + u16 idx; + int ret; + + ret = ida_alloc_max(&cmdqv->vintf_ids, max_idx, GFP_KERNEL); + if (ret < 0) + return ret; + idx = ret; + + vintf->idx = idx; + vintf->cmdqv = cmdqv; + vintf->base = cmdqv->base + TEGRA241_VINTF(idx); + + vintf->lvcmdqs = kcalloc(cmdqv->num_lvcmdqs_per_vintf, + sizeof(*vintf->lvcmdqs), GFP_KERNEL); + if (!vintf->lvcmdqs) { + ida_free(&cmdqv->vintf_ids, idx); + return -ENOMEM; + } + + cmdqv->vintfs[idx] = vintf; + return ret; +} + +/* Remove Helpers */ + +static void tegra241_vintf_remove_lvcmdq(struct tegra241_vintf *vintf, u16 lidx) +{ + tegra241_vcmdq_hw_deinit(vintf->lvcmdqs[lidx]); + tegra241_vintf_free_lvcmdq(vintf, lidx); +} + +static void tegra241_cmdqv_remove_vintf(struct tegra241_cmdqv *cmdqv, u16 idx) +{ + struct tegra241_vintf *vintf = cmdqv->vintfs[idx]; + u16 lidx; + + /* Remove LVCMDQ resources */ + for (lidx = 0; lidx < vintf->cmdqv->num_lvcmdqs_per_vintf; lidx++) + if (vintf->lvcmdqs[lidx]) + tegra241_vintf_remove_lvcmdq(vintf, lidx); + + /* Remove VINTF resources */ + tegra241_vintf_hw_deinit(vintf); + + dev_dbg(cmdqv->dev, "VINTF%u: deallocated\n", vintf->idx); + tegra241_cmdqv_deinit_vintf(cmdqv, idx); + kfree(vintf); +} + +static void tegra241_cmdqv_remove(struct arm_smmu_device *smmu) +{ + struct tegra241_cmdqv *cmdqv = + container_of(smmu, struct tegra241_cmdqv, smmu); + u16 idx; + + /* Remove VINTF resources */ + for (idx = 0; idx < cmdqv->num_vintfs; idx++) { + if (cmdqv->vintfs[idx]) { + /* Only vintf0 should remain at this stage */ + WARN_ON(idx > 0); + tegra241_cmdqv_remove_vintf(cmdqv, idx); + } + } + + /* Remove cmdqv resources */ + ida_destroy(&cmdqv->vintf_ids); + + if (cmdqv->irq > 0) + free_irq(cmdqv->irq, cmdqv); + iounmap(cmdqv->base); + kfree(cmdqv->vintfs); + put_device(cmdqv->dev); /* smmu->impl_dev */ +} + +static struct arm_smmu_impl_ops tegra241_cmdqv_impl_ops = { + .get_secondary_cmdq = tegra241_cmdqv_get_cmdq, + .device_reset = tegra241_cmdqv_hw_reset, + .device_remove = tegra241_cmdqv_remove, +}; + +/* Probe Functions */ + +static int tegra241_cmdqv_acpi_is_memory(struct acpi_resource *res, void *data) +{ + struct resource_win win; + + return !acpi_dev_resource_address_space(res, &win); +} + +static int tegra241_cmdqv_acpi_get_irqs(struct acpi_resource *ares, void *data) +{ + struct resource r; + int *irq = data; + + if (*irq <= 0 && acpi_dev_resource_interrupt(ares, 0, &r)) + *irq = r.start; + return 1; /* No need to add resource to the list */ +} + +static struct resource * +tegra241_cmdqv_find_acpi_resource(struct device *dev, int *irq) +{ + struct acpi_device *adev = to_acpi_device(dev); + struct list_head resource_list; + struct resource_entry *rentry; + struct resource *res = NULL; + int ret; + + INIT_LIST_HEAD(&resource_list); + ret = acpi_dev_get_resources(adev, &resource_list, + tegra241_cmdqv_acpi_is_memory, NULL); + if (ret < 0) { + dev_err(dev, "failed to get memory resource: %d\n", ret); + return NULL; + } + + rentry = list_first_entry_or_null(&resource_list, + struct resource_entry, node); + if (!rentry) { + dev_err(dev, "failed to get memory resource entry\n"); + goto free_list; + } + + /* Caller must free the res */ + res = kzalloc(sizeof(*res), GFP_KERNEL); + if (!res) + goto free_list; + + *res = *rentry->res; + + acpi_dev_free_resource_list(&resource_list); + + INIT_LIST_HEAD(&resource_list); + + if (irq) + ret = acpi_dev_get_resources(adev, &resource_list, + tegra241_cmdqv_acpi_get_irqs, irq); + if (ret < 0 || !irq || *irq <= 0) + dev_warn(dev, "no interrupt. errors will not be reported\n"); + +free_list: + acpi_dev_free_resource_list(&resource_list); + return res; +} + +static struct resource * +tegra241_cmdqv_find_dt_resource(struct device *dev, int *irq) +{ + struct platform_device *pdev = to_platform_device(dev); + struct resource *res; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + dev_err(dev, "no memory resource found for CMDQV\n"); + return NULL; + } + + if (irq) + *irq = platform_get_irq_byname_optional(pdev, "cmdqv"); + if (!irq || *irq <= 0) + dev_warn(dev, "no interrupt. errors will not be reported\n"); + + return res; +} + +static int tegra241_cmdqv_init_structures(struct arm_smmu_device *smmu) +{ + struct tegra241_cmdqv *cmdqv = + container_of(smmu, struct tegra241_cmdqv, smmu); + struct tegra241_vintf *vintf; + int lidx; + int ret; + + vintf = kzalloc(sizeof(*vintf), GFP_KERNEL); + if (!vintf) + return -ENOMEM; + + /* Init VINTF0 for in-kernel use */ + ret = tegra241_cmdqv_init_vintf(cmdqv, 0, vintf); + if (ret) { + dev_err(cmdqv->dev, "failed to init vintf0: %d\n", ret); + return ret; + } + + /* Preallocate logical VCMDQs to VINTF0 */ + for (lidx = 0; lidx < cmdqv->num_lvcmdqs_per_vintf; lidx++) { + struct tegra241_vcmdq *vcmdq; + + vcmdq = tegra241_vintf_alloc_lvcmdq(vintf, lidx); + if (IS_ERR(vcmdq)) + return PTR_ERR(vcmdq); + } + + /* Now, we are ready to run all the impl ops */ + smmu->impl_ops = &tegra241_cmdqv_impl_ops; + return 0; +} + +#ifdef CONFIG_IOMMU_DEBUGFS +static struct dentry *cmdqv_debugfs_dir; +#endif + +static struct arm_smmu_device * +__tegra241_cmdqv_probe(struct arm_smmu_device *smmu, struct resource *res, + int irq) +{ + static const struct arm_smmu_impl_ops init_ops = { + .init_structures = tegra241_cmdqv_init_structures, + .device_remove = tegra241_cmdqv_remove, + }; + struct tegra241_cmdqv *cmdqv = NULL; + struct arm_smmu_device *new_smmu; + void __iomem *base; + u32 regval; + int ret; + + static_assert(offsetof(struct tegra241_cmdqv, smmu) == 0); + + base = ioremap(res->start, resource_size(res)); + if (!base) { + dev_err(smmu->dev, "failed to ioremap\n"); + return NULL; + } + + regval = readl(base + TEGRA241_CMDQV_CONFIG); + if (disable_cmdqv) { + dev_info(smmu->dev, "Detected disable_cmdqv=true\n"); + writel(regval & ~CMDQV_EN, base + TEGRA241_CMDQV_CONFIG); + goto iounmap; + } + + cmdqv = devm_krealloc(smmu->dev, smmu, sizeof(*cmdqv), GFP_KERNEL); + if (!cmdqv) + goto iounmap; + new_smmu = &cmdqv->smmu; + + cmdqv->irq = irq; + cmdqv->base = base; + cmdqv->dev = smmu->impl_dev; + + if (cmdqv->irq > 0) { + ret = request_irq(irq, tegra241_cmdqv_isr, 0, "tegra241-cmdqv", + cmdqv); + if (ret) { + dev_err(cmdqv->dev, "failed to request irq (%d): %d\n", + cmdqv->irq, ret); + goto iounmap; + } + } + + regval = readl_relaxed(REG_CMDQV(cmdqv, PARAM)); + cmdqv->num_vintfs = 1 << FIELD_GET(CMDQV_NUM_VINTF_LOG2, regval); + cmdqv->num_vcmdqs = 1 << FIELD_GET(CMDQV_NUM_VCMDQ_LOG2, regval); + cmdqv->num_lvcmdqs_per_vintf = cmdqv->num_vcmdqs / cmdqv->num_vintfs; + + cmdqv->vintfs = + kcalloc(cmdqv->num_vintfs, sizeof(*cmdqv->vintfs), GFP_KERNEL); + if (!cmdqv->vintfs) + goto free_irq; + + ida_init(&cmdqv->vintf_ids); + +#ifdef CONFIG_IOMMU_DEBUGFS + if (!cmdqv_debugfs_dir) { + cmdqv_debugfs_dir = + debugfs_create_dir("tegra241_cmdqv", iommu_debugfs_dir); + debugfs_create_bool("bypass_vcmdq", 0644, cmdqv_debugfs_dir, + &bypass_vcmdq); + } +#endif + + /* Provide init-level ops only, until tegra241_cmdqv_init_structures */ + new_smmu->impl_ops = &init_ops; + + return new_smmu; + +free_irq: + if (cmdqv->irq > 0) + free_irq(cmdqv->irq, cmdqv); +iounmap: + iounmap(base); + return NULL; +} + +struct arm_smmu_device *tegra241_cmdqv_probe(struct arm_smmu_device *smmu) +{ + struct arm_smmu_device *new_smmu; + struct resource *res = NULL; + int irq; + + if (!smmu->dev->of_node) + res = tegra241_cmdqv_find_acpi_resource(smmu->impl_dev, &irq); + else + res = tegra241_cmdqv_find_dt_resource(smmu->impl_dev, &irq); + if (!res) + goto out_fallback; + + new_smmu = __tegra241_cmdqv_probe(smmu, res, irq); + if (!smmu->dev->of_node) + kfree(res); + + if (new_smmu) + return new_smmu; + +out_fallback: + dev_info(smmu->impl_dev, "Falling back to standard SMMU CMDQ\n"); + smmu->options &= ~ARM_SMMU_OPT_TEGRA241_CMDQV; + put_device(smmu->impl_dev); + return ERR_PTR(-ENODEV); +} + +static const struct of_device_id tegra241_cmdqv_of_match[] = { + { .compatible = "nvidia,tegra264-cmdqv" }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, tegra241_cmdqv_of_match); + +static struct platform_driver tegra241_cmdqv_driver = { + .driver = { + .name = "tegra241-cmdqv", + .of_match_table = tegra241_cmdqv_of_match, + }, +}; +module_platform_driver(tegra241_cmdqv_driver); + +MODULE_DESCRIPTION("NVIDIA Tegra241 Command Queue Virtualization Driver"); +MODULE_LICENSE("GPL v2");