From 7a77903641bb72689c9a4379b58c1c8323310057 Mon Sep 17 00:00:00 2001 From: Ashish Mhetre Date: Wed, 2 Jul 2025 09:04:32 +0000 Subject: [PATCH 01/28] iommu/arm-smmu-v3: Issue a batch of commands to the same cmdq The driver calls in different places the arm_smmu_get_cmdq() helper, and it's fine to do so since the helper always returns the single SMMU CMDQ. However, with NVIDIA CMDQV extension or SMMU ECMDQ, there can be multiple cmdqs in the system to select one from. And either case requires a batch of commands to be issued to the same cmdq. Thus, a cmdq has to be decided in the higher-level callers. Add a cmdq pointer in arm_smmu_cmdq_batch structure, and decide the cmdq when initializing the batch. Pass its pointer down to the bottom function. Update __arm_smmu_cmdq_issue_cmd() accordingly for single command issuers. Suggested-by: Jason Gunthorpe Reviewed-by: Jason Gunthorpe Change-Id: I22baeeb40b0e1d58b63c85246c7033397be7811b Signed-off-by: Nicolin Chen Link: https://lore.kernel.org/r/2cbf5ddefb6ea611e48d67c642271bd24421eb21.1724970714.git.nicolinc@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Ashish Mhetre Reviewed-on: https://git-master.nvidia.com/r/c/3rdparty/canonical/linux-noble/+/3435428 Reviewed-by: Pritesh Raithatha GVS: buildbot_gerritrpt --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 49 +++++++++++++-------- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 1 + 2 files changed, 32 insertions(+), 18 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index b31fff4f4741..5d12eb02ff2c 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -591,11 +591,11 @@ static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq, /* Wait for the command queue to become non-full */ static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq *cmdq, struct arm_smmu_ll_queue *llq) { unsigned long flags; struct arm_smmu_queue_poll qp; - struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu); int ret = 0; /* @@ -626,11 +626,11 @@ static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu, * Must be called with the cmdq lock held in some capacity. */ static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq *cmdq, struct arm_smmu_ll_queue *llq) { int ret = 0; struct arm_smmu_queue_poll qp; - struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu); u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod)); queue_poll_init(smmu, &qp); @@ -650,10 +650,10 @@ static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu, * Must be called with the cmdq lock held in some capacity. */ static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq *cmdq, struct arm_smmu_ll_queue *llq) { struct arm_smmu_queue_poll qp; - struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu); u32 prod = llq->prod; int ret = 0; @@ -700,12 +700,13 @@ static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu, } static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq *cmdq, struct arm_smmu_ll_queue *llq) { if (smmu->options & ARM_SMMU_OPT_MSIPOLL) - return __arm_smmu_cmdq_poll_until_msi(smmu, llq); + return __arm_smmu_cmdq_poll_until_msi(smmu, cmdq, llq); - return __arm_smmu_cmdq_poll_until_consumed(smmu, llq); + return __arm_smmu_cmdq_poll_until_consumed(smmu, cmdq, llq); } static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds, @@ -742,13 +743,13 @@ static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds, * CPU will appear before any of the commands from the other CPU. */ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq *cmdq, u64 *cmds, int n, bool sync) { u64 cmd_sync[CMDQ_ENT_DWORDS]; u32 prod; unsigned long flags; bool owner; - struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu); struct arm_smmu_ll_queue llq, head; int ret = 0; @@ -762,7 +763,7 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu, while (!queue_has_space(&llq, n + sync)) { local_irq_restore(flags); - if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq)) + if (arm_smmu_cmdq_poll_until_not_full(smmu, cmdq, &llq)) dev_err_ratelimited(smmu->dev, "CMDQ timeout\n"); local_irq_save(flags); } @@ -838,7 +839,7 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu, /* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */ if (sync) { llq.prod = queue_inc_prod_n(&llq, n); - ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq); + ret = arm_smmu_cmdq_poll_until_sync(smmu, cmdq, &llq); if (ret) { dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n", @@ -873,7 +874,8 @@ static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu, return -EINVAL; } - return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, sync); + return arm_smmu_cmdq_issue_cmdlist( + smmu, arm_smmu_get_cmdq(smmu), cmd, 1, sync); } static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu, @@ -888,6 +890,13 @@ static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu, return __arm_smmu_cmdq_issue_cmd(smmu, ent, true); } +static void arm_smmu_cmdq_batch_init(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq_batch *cmds) +{ + cmds->num = 0; + cmds->cmdq = arm_smmu_get_cmdq(smmu); +} + static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu, struct arm_smmu_cmdq_batch *cmds, struct arm_smmu_cmdq_ent *cmd) @@ -896,13 +905,15 @@ static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu, if (cmds->num == CMDQ_BATCH_ENTRIES - 1 && (smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC)) { - arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true); - cmds->num = 0; + arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds, + cmds->num, true); + arm_smmu_cmdq_batch_init(smmu, cmds); } if (cmds->num == CMDQ_BATCH_ENTRIES) { - arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false); - cmds->num = 0; + arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds, + cmds->num, false); + arm_smmu_cmdq_batch_init(smmu, cmds); } index = cmds->num * CMDQ_ENT_DWORDS; @@ -918,7 +929,9 @@ static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu, static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu, struct arm_smmu_cmdq_batch *cmds) { - return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true); + return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds, + cmds->num, true); + } static int arm_smmu_page_response(struct device *dev, @@ -985,7 +998,7 @@ static void arm_smmu_sync_cd(struct arm_smmu_master *master, }, }; - cmds.num = 0; + arm_smmu_cmdq_batch_init(smmu, &cmds); for (i = 0; i < master->num_streams; i++) { cmd.cfgi.sid = master->streams[i].id; arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd); @@ -1786,7 +1799,7 @@ static int arm_smmu_atc_inv_master(struct arm_smmu_master *master) arm_smmu_atc_inv_to_cmd(IOMMU_NO_PASID, 0, 0, &cmd); - cmds.num = 0; + arm_smmu_cmdq_batch_init(master->smmu, &cmds); for (i = 0; i < master->num_streams; i++) { cmd.atc.sid = master->streams[i].id; arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd); @@ -1826,7 +1839,7 @@ int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid, arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd); - cmds.num = 0; + arm_smmu_cmdq_batch_init(smmu_domain->smmu, &cmds); spin_lock_irqsave(&smmu_domain->devices_lock, flags); list_for_each_entry(master, &smmu_domain->devices, domain_head) { @@ -1903,7 +1916,7 @@ static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd, num_pages++; } - cmds.num = 0; + arm_smmu_cmdq_batch_init(smmu, &cmds); while (iova < end) { if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) { diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index a9b8a76e5c18..f05b76ff93ad 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -559,6 +559,7 @@ struct arm_smmu_cmdq { struct arm_smmu_cmdq_batch { u64 cmds[CMDQ_BATCH_ENTRIES * CMDQ_ENT_DWORDS]; + struct arm_smmu_cmdq *cmdq; int num; }; From d6b2a909b9a97503b3d0b139712cd53130ea1516 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Thu, 29 Aug 2024 15:34:31 -0700 Subject: [PATCH 02/28] iommu/arm-smmu-v3: Pass in cmdq pointer to arm_smmu_cmdq_build_sync_cmd The CMDQV extension on NVIDIA Tegra241 SoC only supports CS_NONE in the CS field of CMD_SYNC, v.s. standard SMMU CMDQ. Pass in the cmdq pointer directly, so the function can identify a different cmdq implementation. Reviewed-by: Jason Gunthorpe Change-Id: I3f29960566b7fb75a76046b70ef55594ffbd0e56 Signed-off-by: Nicolin Chen Link: https://lore.kernel.org/r/723288287997b6dfbcd2a904d2c11e9b23f82250.1724970714.git.nicolinc@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Ashish Mhetre Reviewed-on: https://git-master.nvidia.com/r/c/3rdparty/canonical/linux-noble/+/3435429 GVS: buildbot_gerritrpt Reviewed-by: Pritesh Raithatha --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 5d12eb02ff2c..fb4cf6115044 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -351,8 +351,9 @@ static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu) } static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu, - struct arm_smmu_queue *q, u32 prod) + struct arm_smmu_cmdq *cmdq, u32 prod) { + struct arm_smmu_queue *q = &cmdq->q; struct arm_smmu_cmdq_ent ent = { .opcode = CMDQ_OP_CMD_SYNC, }; @@ -370,7 +371,7 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu, } static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu, - struct arm_smmu_queue *q) + struct arm_smmu_cmdq *cmdq) { static const char * const cerror_str[] = { [CMDQ_ERR_CERROR_NONE_IDX] = "No error", @@ -378,6 +379,7 @@ static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu, [CMDQ_ERR_CERROR_ABT_IDX] = "Abort on command fetch", [CMDQ_ERR_CERROR_ATC_INV_IDX] = "ATC invalidate timeout", }; + struct arm_smmu_queue *q = &cmdq->q; int i; u64 cmd[CMDQ_ENT_DWORDS]; @@ -426,7 +428,7 @@ static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu, static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu) { - __arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq.q); + __arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq); } /* @@ -789,7 +791,7 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu, arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n); if (sync) { prod = queue_inc_prod_n(&llq, n); - arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, &cmdq->q, prod); + arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, cmdq, prod); queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS); /* From af82bcacd5dcd51ebf418a008ed9298512689bc8 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Thu, 29 Aug 2024 15:34:32 -0700 Subject: [PATCH 03/28] iommu/arm-smmu-v3: Pass in cmdq pointer to arm_smmu_cmdq_init So that this function can be used by other cmdqs than &smmu->cmdq only. Reviewed-by: Jason Gunthorpe Change-Id: I7eb1698fe46941ed326f0c0a11ad28d6ecddddd8 Signed-off-by: Nicolin Chen Link: https://lore.kernel.org/r/e11a3c0bde172c9652c2946f12bc2ceed4c3a355.1724970714.git.nicolinc@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Ashish Mhetre Reviewed-on: https://git-master.nvidia.com/r/c/3rdparty/canonical/linux-noble/+/3435430 Reviewed-by: Pritesh Raithatha GVS: buildbot_gerritrpt --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index fb4cf6115044..a19f3beb0f60 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2930,9 +2930,9 @@ static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu, return 0; } -static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu) +static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq *cmdq) { - struct arm_smmu_cmdq *cmdq = &smmu->cmdq; unsigned int nents = 1 << cmdq->q.llq.max_n_shift; atomic_set(&cmdq->owner_prod, 0); @@ -2957,7 +2957,7 @@ static int arm_smmu_init_queues(struct arm_smmu_device *smmu) if (ret) return ret; - ret = arm_smmu_cmdq_init(smmu); + ret = arm_smmu_cmdq_init(smmu, &smmu->cmdq); if (ret) return ret; From bf155e5de12cd8c9664d33f34237d983806c1e74 Mon Sep 17 00:00:00 2001 From: Ashish Mhetre Date: Wed, 2 Jul 2025 09:11:13 +0000 Subject: [PATCH 04/28] iommu/arm-smmu-v3: Make symbols public for CONFIG_TEGRA241_CMDQV The symbols __arm_smmu_cmdq_skip_err(), arm_smmu_init_one_queue(), and arm_smmu_cmdq_init() need to be used by the tegra241-cmdqv compilation unit in a following patch. Remove the static and put prototypes in the header. Reviewed-by: Jason Gunthorpe Change-Id: I7b66675548ca8f3ed475d4fdd021d473feac07f2 Signed-off-by: Nicolin Chen Link: https://lore.kernel.org/r/c4f2aa5f5f40a2e7c68b132c6d3171d6403de57a.1724970714.git.nicolinc@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Ashish Mhetre Reviewed-on: https://git-master.nvidia.com/r/c/3rdparty/canonical/linux-noble/+/3435431 GVS: buildbot_gerritrpt Reviewed-by: Pritesh Raithatha --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 18 ++++++++---------- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 9 +++++++++ 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index a19f3beb0f60..8832db217292 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -370,8 +370,8 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu, arm_smmu_cmdq_build_cmd(cmd, &ent); } -static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu, - struct arm_smmu_cmdq *cmdq) +void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq *cmdq) { static const char * const cerror_str[] = { [CMDQ_ERR_CERROR_NONE_IDX] = "No error", @@ -2887,12 +2887,10 @@ static struct iommu_ops arm_smmu_ops = { }; /* Probing and initialisation functions */ -static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu, - struct arm_smmu_queue *q, - void __iomem *page, - unsigned long prod_off, - unsigned long cons_off, - size_t dwords, const char *name) +int arm_smmu_init_one_queue(struct arm_smmu_device *smmu, + struct arm_smmu_queue *q, void __iomem *page, + unsigned long prod_off, unsigned long cons_off, + size_t dwords, const char *name) { size_t qsz; @@ -2930,8 +2928,8 @@ static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu, return 0; } -static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu, - struct arm_smmu_cmdq *cmdq) +int arm_smmu_cmdq_init(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq *cmdq) { unsigned int nents = 1 << cmdq->q.llq.max_n_shift; diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index f05b76ff93ad..e11b5d250f7c 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -761,6 +761,15 @@ bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd); int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid, unsigned long iova, size_t size); +void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq *cmdq); +int arm_smmu_init_one_queue(struct arm_smmu_device *smmu, + struct arm_smmu_queue *q, void __iomem *page, + unsigned long prod_off, unsigned long cons_off, + size_t dwords, const char *name); +int arm_smmu_cmdq_init(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq *cmdq); + #ifdef CONFIG_ARM_SMMU_V3_SVA bool arm_smmu_sva_supported(struct arm_smmu_device *smmu); bool arm_smmu_master_sva_supported(struct arm_smmu_master *master); From 5903705a336089dcefec1789b3c8c1b9c0f8fa72 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Thu, 29 Aug 2024 15:34:34 -0700 Subject: [PATCH 05/28] iommu/arm-smmu-v3: Add ARM_SMMU_OPT_TEGRA241_CMDQV The CMDQV extension in NVIDIA Tegra241 SoC only supports CS_NONE in the CS field of CMD_SYNC. Add a new SMMU option to accommodate that. Suggested-by: Will Deacon Reviewed-by: Jason Gunthorpe Change-Id: I8767faf8b780474c93c78305e89bd51308f45369 Signed-off-by: Nicolin Chen Link: https://lore.kernel.org/r/a3cb9bb2429fbae4a59f7ef517614d226763d717.1724970714.git.nicolinc@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Ashish Mhetre Reviewed-on: https://git-master.nvidia.com/r/c/3rdparty/canonical/linux-noble/+/3435432 GVS: buildbot_gerritrpt Reviewed-by: Pritesh Raithatha --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 16 +++++++++++++++- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 1 + 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 8832db217292..fdc56eeb1463 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -350,6 +350,15 @@ static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu) return &smmu->cmdq; } +static bool arm_smmu_cmdq_needs_busy_polling(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq *cmdq) +{ + if (cmdq == &smmu->cmdq) + return false; + + return smmu->options & ARM_SMMU_OPT_TEGRA241_CMDQV; +} + static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu, struct arm_smmu_cmdq *cmdq, u32 prod) { @@ -368,6 +377,8 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu, } arm_smmu_cmdq_build_cmd(cmd, &ent); + if (arm_smmu_cmdq_needs_busy_polling(smmu, cmdq)) + u64p_replace_bits(cmd, CMDQ_SYNC_0_CS_NONE, CMDQ_SYNC_0_CS); } void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu, @@ -422,6 +433,8 @@ void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu, /* Convert the erroneous command into a CMD_SYNC */ arm_smmu_cmdq_build_cmd(cmd, &cmd_sync); + if (arm_smmu_cmdq_needs_busy_polling(smmu, cmdq)) + u64p_replace_bits(cmd, CMDQ_SYNC_0_CS_NONE, CMDQ_SYNC_0_CS); queue_write(Q_ENT(q, cons), cmd, q->ent_dwords); } @@ -705,7 +718,8 @@ static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu, struct arm_smmu_cmdq *cmdq, struct arm_smmu_ll_queue *llq) { - if (smmu->options & ARM_SMMU_OPT_MSIPOLL) + if (smmu->options & ARM_SMMU_OPT_MSIPOLL && + !arm_smmu_cmdq_needs_busy_polling(smmu, cmdq)) return __arm_smmu_cmdq_poll_until_msi(smmu, cmdq, llq); return __arm_smmu_cmdq_poll_until_consumed(smmu, cmdq, llq); diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index e11b5d250f7c..252a5e34db79 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -656,6 +656,7 @@ struct arm_smmu_device { #define ARM_SMMU_OPT_PAGE0_REGS_ONLY (1 << 1) #define ARM_SMMU_OPT_MSIPOLL (1 << 2) #define ARM_SMMU_OPT_CMDQ_FORCE_SYNC (1 << 3) +#define ARM_SMMU_OPT_TEGRA241_CMDQV (1 << 4) u32 options; struct arm_smmu_cmdq cmdq; From 3f925f4d6b61df487939d2ece229822d63e9e6c9 Mon Sep 17 00:00:00 2001 From: Ashish Mhetre Date: Wed, 2 Jul 2025 09:15:30 +0000 Subject: [PATCH 06/28] iommu/arm-smmu-v3: Add acpi_smmu_iort_probe_model for impl For model-specific implementation, repurpose the acpi_smmu_get_options() to a wider acpi_smmu_acpi_probe_model(). A new model can add to the list in this new function. Suggested-by: Will Deacon Change-Id: I6a9912f55cde32a78da1203f1abffc50e59c1438 Signed-off-by: Nicolin Chen Link: https://lore.kernel.org/r/79716299829aeab2e55b8c7932f2634b209bb4d5.1724970714.git.nicolinc@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Ashish Mhetre Reviewed-on: https://git-master.nvidia.com/r/c/3rdparty/canonical/linux-noble/+/3435433 Reviewed-by: Pritesh Raithatha GVS: buildbot_gerritrpt --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index fdc56eeb1463..be7214b16950 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -3713,18 +3713,25 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) } #ifdef CONFIG_ACPI -static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu) +static int acpi_smmu_iort_probe_model(struct acpi_iort_node *node, + struct arm_smmu_device *smmu) { - switch (model) { + struct acpi_iort_smmu_v3 *iort_smmu = + (struct acpi_iort_smmu_v3 *)node->node_data; + + switch (iort_smmu->model) { case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX: smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY; break; case ACPI_IORT_SMMU_V3_HISILICON_HI161X: smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH; break; + case ACPI_IORT_SMMU_V3_GENERIC: + break; } dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options); + return 0; } static int arm_smmu_device_acpi_probe(struct platform_device *pdev, @@ -3739,12 +3746,10 @@ static int arm_smmu_device_acpi_probe(struct platform_device *pdev, /* Retrieve SMMUv3 specific data */ iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data; - acpi_smmu_get_options(iort_smmu->model, smmu); - if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE) smmu->features |= ARM_SMMU_FEAT_COHERENCY; - return 0; + return acpi_smmu_iort_probe_model(node, smmu); } #else static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev, From 8eac48a1ba52fa8a376535221cc123baf7f28d28 Mon Sep 17 00:00:00 2001 From: Ashish Mhetre Date: Wed, 2 Jul 2025 09:20:30 +0000 Subject: [PATCH 07/28] iommu/arm-smmu-v3: Add struct arm_smmu_impl_ops Mimicing the arm-smmu (v2) driver, introduce a struct arm_smmu_impl_ops to accommodate impl routines. Suggested-by: Will Deacon Change-Id: I9d5d5e1cf32dcd6c282dc217903d6c424d1417cb Signed-off-by: Jason Gunthorpe Signed-off-by: Nicolin Chen Link: https://lore.kernel.org/r/8fe9f3805568aabf771fc6706c116459016bf62d.1724970714.git.nicolinc@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Ashish Mhetre Reviewed-on: https://git-master.nvidia.com/r/c/3rdparty/canonical/linux-noble/+/3435434 GVS: buildbot_gerritrpt Reviewed-by: Pritesh Raithatha --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 52 ++++++++++++++++++++- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 10 ++++ 2 files changed, 60 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index be7214b16950..80369af5b9d4 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -347,7 +347,12 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent) static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu) { - return &smmu->cmdq; + struct arm_smmu_cmdq *cmdq = NULL; + + if (smmu->impl_ops && smmu->impl_ops->get_secondary_cmdq) + cmdq = smmu->impl_ops->get_secondary_cmdq(smmu); + + return cmdq ?: &smmu->cmdq; } static bool arm_smmu_cmdq_needs_busy_polling(struct arm_smmu_device *smmu, @@ -3827,6 +3832,38 @@ static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu) iort_put_rmr_sids(dev_fwnode(smmu->dev), &rmr_list); } +static void arm_smmu_impl_remove(void *data) +{ + struct arm_smmu_device *smmu = data; + + if (smmu->impl_ops && smmu->impl_ops->device_remove) + smmu->impl_ops->device_remove(smmu); +} + +/* + * Probe all the compiled in implementations. Each one checks to see if it + * matches this HW and if so returns a devm_krealloc'd arm_smmu_device which + * replaces the callers. Otherwise the original is returned or ERR_PTR. + */ +static struct arm_smmu_device *arm_smmu_impl_probe(struct arm_smmu_device *smmu) +{ + struct arm_smmu_device *new_smmu = ERR_PTR(-ENODEV); + int ret; + + /* Add impl probe */ + + if (new_smmu == ERR_PTR(-ENODEV)) + return smmu; + if (IS_ERR(new_smmu)) + return new_smmu; + + ret = devm_add_action_or_reset(new_smmu->dev, arm_smmu_impl_remove, + new_smmu); + if (ret) + return ERR_PTR(ret); + return new_smmu; +} + static int arm_smmu_device_probe(struct platform_device *pdev) { int irq, ret; @@ -3847,10 +3884,13 @@ static int arm_smmu_device_probe(struct platform_device *pdev) if (ret == -ENODEV) return ret; } - /* Set bypass mode according to firmware probing result */ smmu->bypass = !!ret; + smmu = arm_smmu_impl_probe(smmu); + if (IS_ERR(smmu)) + return PTR_ERR(smmu); + /* Base address */ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!res) @@ -3933,6 +3973,14 @@ static int arm_smmu_device_probe(struct platform_device *pdev) goto err_free_sysfs; } + if (smmu->impl_ops && smmu->impl_ops->device_reset) { + ret = smmu->impl_ops->device_reset(smmu); + if (ret) { + dev_err(smmu->dev, "failed to reset impl\n"); + return ret; + } + } + return 0; err_free_sysfs: diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 252a5e34db79..7693c7e95db5 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -14,6 +14,8 @@ #include #include +struct arm_smmu_device; + /* MMIO registers */ #define ARM_SMMU_IDR0 0x0 #define IDR0_ST_LVL GENMASK(28, 27) @@ -624,9 +626,17 @@ struct arm_smmu_strtab_cfg { u32 strtab_base_cfg; }; +struct arm_smmu_impl_ops { + int (*device_reset)(struct arm_smmu_device *smmu); + void (*device_remove)(struct arm_smmu_device *smmu); + struct arm_smmu_cmdq *(*get_secondary_cmdq)(struct arm_smmu_device *smmu); +}; + /* An SMMUv3 instance */ struct arm_smmu_device { struct device *dev; + const struct arm_smmu_impl_ops *impl_ops; + void __iomem *base; void __iomem *page1; From 2e2b4c777ba178669873ce13f797b157f4b9e33d Mon Sep 17 00:00:00 2001 From: Nate Watterson Date: Thu, 29 Aug 2024 15:34:37 -0700 Subject: [PATCH 08/28] iommu/arm-smmu-v3: Add in-kernel support for NVIDIA Tegra241 (Grace) CMDQV NVIDIA's Tegra241 Soc has a CMDQ-Virtualization (CMDQV) hardware, extending the standard ARM SMMU v3 IP to support multiple VCMDQs with virtualization capabilities. In terms of command queue, they are very like a standard SMMU CMDQ (or ECMDQs), but only support CS_NONE in the CS field of CMD_SYNC. Add a new tegra241-cmdqv driver, and insert its structure pointer into the existing arm_smmu_device, and then add related function calls in the SMMUv3 driver to interact with the CMDQV driver. In the CMDQV driver, add a minimal part for the in-kernel support: reserve VINTF0 for in-kernel use, and assign some of the VCMDQs to the VINTF0, and select one VCMDQ based on the current CPU ID to execute supported commands. This multi-queue design for in-kernel use gives some limited improvements: up to 20% reduction of invalidation time was measured by a multi-threaded DMA unmap benchmark, compared to a single queue. The other part of the CMDQV driver will be user-space support that gives a hypervisor running on the host OS to talk to the driver for virtualization use cases, allowing VMs to use VCMDQs without trappings, i.e. no VM Exits. This is designed based on IOMMUFD, and its RFC series is also under review. It will provide a guest OS a bigger improvement: 70% to 90% reductions of TLB invalidation time were measured by DMA unmap tests running in a guest, compared to nested SMMU CMDQ (with trappings). As the initial version, the CMDQV driver only supports ACPI configurations. Change-Id: Ib51b81f62fa115ce20dc542715493506d4d7f557 Signed-off-by: Nate Watterson Reviewed-by: Jason Gunthorpe Co-developed-by: Nicolin Chen Signed-off-by: Nicolin Chen Link: https://lore.kernel.org/r/dce50490b2c10b7254fb36aa73ed7ffd812b283a.1724970714.git.nicolinc@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Ashish Mhetre Reviewed-on: https://git-master.nvidia.com/r/c/3rdparty/canonical/linux-noble/+/3435435 GVS: buildbot_gerritrpt Reviewed-by: Pritesh Raithatha --- MAINTAINERS | 1 + drivers/iommu/Kconfig | 12 + drivers/iommu/arm/arm-smmu-v3/Makefile | 1 + drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 33 +- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 11 + .../iommu/arm/arm-smmu-v3/tegra241-cmdqv.c | 858 ++++++++++++++++++ 6 files changed, 915 insertions(+), 1 deletion(-) create mode 100644 drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c diff --git a/MAINTAINERS b/MAINTAINERS index c1c697cd6eb5..826e2f38db3a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -21625,6 +21625,7 @@ M: Thierry Reding R: Krishna Reddy L: linux-tegra@vger.kernel.org S: Supported +F: drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c F: drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c F: drivers/iommu/tegra* diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 9dbb55e745bd..e0d5960e7753 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -406,6 +406,18 @@ config ARM_SMMU_V3_SVA Say Y here if your system supports SVA extensions such as PCIe PASID and PRI. + +config TEGRA241_CMDQV + bool "NVIDIA Tegra241 CMDQ-V extension support for ARM SMMUv3" + depends on ACPI + help + Support for NVIDIA CMDQ-Virtualization extension for ARM SMMUv3. The + CMDQ-V extension is similar to v3.3 ECMDQ for multi command queues + support, except with virtualization capabilities. + + Say Y here if your system is NVIDIA Tegra241 (Grace) or it has the same + CMDQ-V extension. + config S390_IOMMU def_bool y if S390 && PCI depends on S390 && PCI diff --git a/drivers/iommu/arm/arm-smmu-v3/Makefile b/drivers/iommu/arm/arm-smmu-v3/Makefile index 54feb1ecccad..8dff2bc4c7f3 100644 --- a/drivers/iommu/arm/arm-smmu-v3/Makefile +++ b/drivers/iommu/arm/arm-smmu-v3/Makefile @@ -2,4 +2,5 @@ obj-$(CONFIG_ARM_SMMU_V3) += arm_smmu_v3.o arm_smmu_v3-objs-y += arm-smmu-v3.o arm_smmu_v3-objs-$(CONFIG_ARM_SMMU_V3_SVA) += arm-smmu-v3-sva.o +arm_smmu_v3-objs-$(CONFIG_TEGRA241_CMDQV) += tegra241-cmdqv.o arm_smmu_v3-objs := $(arm_smmu_v3-objs-y) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 80369af5b9d4..20e594e67459 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -3718,6 +3718,31 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) } #ifdef CONFIG_ACPI +#ifdef CONFIG_TEGRA241_CMDQV +static void acpi_smmu_dsdt_probe_tegra241_cmdqv(struct acpi_iort_node *node, + struct arm_smmu_device *smmu) +{ + const char *uid = kasprintf(GFP_KERNEL, "%u", node->identifier); + struct acpi_device *adev; + + /* Look for an NVDA200C node whose _UID matches the SMMU node ID */ + adev = acpi_dev_get_first_match_dev("NVDA200C", uid, -1); + if (adev) { + /* Tegra241 CMDQV driver is responsible for put_device() */ + smmu->impl_dev = &adev->dev; + smmu->options |= ARM_SMMU_OPT_TEGRA241_CMDQV; + dev_info(smmu->dev, "found companion CMDQV device: %s\n", + dev_name(smmu->impl_dev)); + } + kfree(uid); +} +#else +static void acpi_smmu_dsdt_probe_tegra241_cmdqv(struct acpi_iort_node *node, + struct arm_smmu_device *smmu) +{ +} +#endif + static int acpi_smmu_iort_probe_model(struct acpi_iort_node *node, struct arm_smmu_device *smmu) { @@ -3732,6 +3757,11 @@ static int acpi_smmu_iort_probe_model(struct acpi_iort_node *node, smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH; break; case ACPI_IORT_SMMU_V3_GENERIC: + /* + * Tegra241 implementation stores its SMMU options and impl_dev + * in DSDT. Thus, go through the ACPI tables unconditionally. + */ + acpi_smmu_dsdt_probe_tegra241_cmdqv(node, smmu); break; } @@ -3850,7 +3880,8 @@ static struct arm_smmu_device *arm_smmu_impl_probe(struct arm_smmu_device *smmu) struct arm_smmu_device *new_smmu = ERR_PTR(-ENODEV); int ret; - /* Add impl probe */ + if (smmu->impl_dev && (smmu->options & ARM_SMMU_OPT_TEGRA241_CMDQV)) + new_smmu = tegra241_cmdqv_probe(smmu); if (new_smmu == ERR_PTR(-ENODEV)) return smmu; diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 7693c7e95db5..233a4a57822b 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -635,6 +635,7 @@ struct arm_smmu_impl_ops { /* An SMMUv3 instance */ struct arm_smmu_device { struct device *dev; + struct device *impl_dev; const struct arm_smmu_impl_ops *impl_ops; void __iomem *base; @@ -836,4 +837,14 @@ static inline void arm_smmu_sva_remove_dev_pasid(struct iommu_domain *domain, { } #endif /* CONFIG_ARM_SMMU_V3_SVA */ + +#ifdef CONFIG_TEGRA241_CMDQV +struct arm_smmu_device *tegra241_cmdqv_probe(struct arm_smmu_device *smmu); +#else /* CONFIG_TEGRA241_CMDQV */ +static inline struct arm_smmu_device * +tegra241_cmdqv_probe(struct arm_smmu_device *smmu) +{ + return ERR_PTR(-ENODEV); +} +#endif /* CONFIG_TEGRA241_CMDQV */ #endif /* _ARM_SMMU_V3_H */ diff --git a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c new file mode 100644 index 000000000000..5ac3032ee6dd --- /dev/null +++ b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c @@ -0,0 +1,858 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2021-2024 NVIDIA CORPORATION & AFFILIATES. */ + +#define dev_fmt(fmt) "tegra241_cmdqv: " fmt + +#include +#include +#include +#include +#include +#include + +#include + +#include "arm-smmu-v3.h" + +/* CMDQV register page base and size defines */ +#define TEGRA241_CMDQV_CONFIG_BASE (0) +#define TEGRA241_CMDQV_CONFIG_SIZE (SZ_64K) +#define TEGRA241_VCMDQ_PAGE0_BASE (TEGRA241_CMDQV_CONFIG_BASE + SZ_64K) +#define TEGRA241_VCMDQ_PAGE1_BASE (TEGRA241_VCMDQ_PAGE0_BASE + SZ_64K) +#define TEGRA241_VINTF_PAGE_BASE (TEGRA241_VCMDQ_PAGE1_BASE + SZ_64K) + +/* CMDQV global base regs */ +#define TEGRA241_CMDQV_CONFIG 0x0000 +#define CMDQV_EN BIT(0) + +#define TEGRA241_CMDQV_PARAM 0x0004 +#define CMDQV_NUM_VINTF_LOG2 GENMASK(11, 8) +#define CMDQV_NUM_VCMDQ_LOG2 GENMASK(7, 4) + +#define TEGRA241_CMDQV_STATUS 0x0008 +#define CMDQV_ENABLED BIT(0) + +#define TEGRA241_CMDQV_VINTF_ERR_MAP 0x0014 +#define TEGRA241_CMDQV_VINTF_INT_MASK 0x001C +#define TEGRA241_CMDQV_CMDQ_ERR_MAP(m) (0x0024 + 0x4*(m)) + +#define TEGRA241_CMDQV_CMDQ_ALLOC(q) (0x0200 + 0x4*(q)) +#define CMDQV_CMDQ_ALLOC_VINTF GENMASK(20, 15) +#define CMDQV_CMDQ_ALLOC_LVCMDQ GENMASK(7, 1) +#define CMDQV_CMDQ_ALLOCATED BIT(0) + +/* VINTF base regs */ +#define TEGRA241_VINTF(v) (0x1000 + 0x100*(v)) + +#define TEGRA241_VINTF_CONFIG 0x0000 +#define VINTF_HYP_OWN BIT(17) +#define VINTF_VMID GENMASK(16, 1) +#define VINTF_EN BIT(0) + +#define TEGRA241_VINTF_STATUS 0x0004 +#define VINTF_STATUS GENMASK(3, 1) +#define VINTF_ENABLED BIT(0) + +#define TEGRA241_VINTF_LVCMDQ_ERR_MAP_64(m) \ + (0x00C0 + 0x8*(m)) +#define LVCMDQ_ERR_MAP_NUM_64 2 + +/* VCMDQ base regs */ +/* -- PAGE0 -- */ +#define TEGRA241_VCMDQ_PAGE0(q) (TEGRA241_VCMDQ_PAGE0_BASE + 0x80*(q)) + +#define TEGRA241_VCMDQ_CONS 0x00000 +#define VCMDQ_CONS_ERR GENMASK(30, 24) + +#define TEGRA241_VCMDQ_PROD 0x00004 + +#define TEGRA241_VCMDQ_CONFIG 0x00008 +#define VCMDQ_EN BIT(0) + +#define TEGRA241_VCMDQ_STATUS 0x0000C +#define VCMDQ_ENABLED BIT(0) + +#define TEGRA241_VCMDQ_GERROR 0x00010 +#define TEGRA241_VCMDQ_GERRORN 0x00014 + +/* -- PAGE1 -- */ +#define TEGRA241_VCMDQ_PAGE1(q) (TEGRA241_VCMDQ_PAGE1_BASE + 0x80*(q)) +#define VCMDQ_ADDR GENMASK(47, 5) +#define VCMDQ_LOG2SIZE GENMASK(4, 0) +#define VCMDQ_LOG2SIZE_MAX 19 + +#define TEGRA241_VCMDQ_BASE 0x00000 +#define TEGRA241_VCMDQ_CONS_INDX_BASE 0x00008 + +/* VINTF logical-VCMDQ pages */ +#define TEGRA241_VINTFi_PAGE0(i) (TEGRA241_VINTF_PAGE_BASE + SZ_128K*(i)) +#define TEGRA241_VINTFi_PAGE1(i) (TEGRA241_VINTFi_PAGE0(i) + SZ_64K) +#define TEGRA241_VINTFi_LVCMDQ_PAGE0(i, q) \ + (TEGRA241_VINTFi_PAGE0(i) + 0x80*(q)) +#define TEGRA241_VINTFi_LVCMDQ_PAGE1(i, q) \ + (TEGRA241_VINTFi_PAGE1(i) + 0x80*(q)) + +/* MMIO helpers */ +#define REG_CMDQV(_cmdqv, _regname) \ + ((_cmdqv)->base + TEGRA241_CMDQV_##_regname) +#define REG_VINTF(_vintf, _regname) \ + ((_vintf)->base + TEGRA241_VINTF_##_regname) +#define REG_VCMDQ_PAGE0(_vcmdq, _regname) \ + ((_vcmdq)->page0 + TEGRA241_VCMDQ_##_regname) +#define REG_VCMDQ_PAGE1(_vcmdq, _regname) \ + ((_vcmdq)->page1 + TEGRA241_VCMDQ_##_regname) + + +static bool disable_cmdqv; +module_param(disable_cmdqv, bool, 0444); +MODULE_PARM_DESC(disable_cmdqv, + "This allows to disable CMDQV HW and use default SMMU internal CMDQ."); + +static bool bypass_vcmdq; +module_param(bypass_vcmdq, bool, 0444); +MODULE_PARM_DESC(bypass_vcmdq, + "This allows to bypass VCMDQ for debugging use or perf comparison."); + +/** + * struct tegra241_vcmdq - Virtual Command Queue + * @idx: Global index in the CMDQV + * @lidx: Local index in the VINTF + * @enabled: Enable status + * @cmdqv: Parent CMDQV pointer + * @vintf: Parent VINTF pointer + * @cmdq: Command Queue struct + * @page0: MMIO Page0 base address + * @page1: MMIO Page1 base address + */ +struct tegra241_vcmdq { + u16 idx; + u16 lidx; + + bool enabled; + + struct tegra241_cmdqv *cmdqv; + struct tegra241_vintf *vintf; + struct arm_smmu_cmdq cmdq; + + void __iomem *page0; + void __iomem *page1; +}; + +/** + * struct tegra241_vintf - Virtual Interface + * @idx: Global index in the CMDQV + * @enabled: Enable status + * @cmdqv: Parent CMDQV pointer + * @lvcmdqs: List of logical VCMDQ pointers + * @base: MMIO base address + */ +struct tegra241_vintf { + u16 idx; + + bool enabled; + + struct tegra241_cmdqv *cmdqv; + struct tegra241_vcmdq **lvcmdqs; + + void __iomem *base; +}; + +/** + * struct tegra241_cmdqv - CMDQ-V for SMMUv3 + * @smmu: SMMUv3 device + * @dev: CMDQV device + * @base: MMIO base address + * @irq: IRQ number + * @num_vintfs: Total number of VINTFs + * @num_vcmdqs: Total number of VCMDQs + * @num_lvcmdqs_per_vintf: Number of logical VCMDQs per VINTF + * @vintf_ids: VINTF id allocator + * @vintfs: List of VINTFs + */ +struct tegra241_cmdqv { + struct arm_smmu_device smmu; + struct device *dev; + + void __iomem *base; + int irq; + + /* CMDQV Hardware Params */ + u16 num_vintfs; + u16 num_vcmdqs; + u16 num_lvcmdqs_per_vintf; + + struct ida vintf_ids; + + struct tegra241_vintf **vintfs; +}; + +/* Config and Polling Helpers */ + +static inline int tegra241_cmdqv_write_config(struct tegra241_cmdqv *cmdqv, + void __iomem *addr_config, + void __iomem *addr_status, + u32 regval, const char *header, + bool *out_enabled) +{ + bool en = regval & BIT(0); + int ret; + + writel(regval, addr_config); + ret = readl_poll_timeout(addr_status, regval, + en ? regval & BIT(0) : !(regval & BIT(0)), + 1, ARM_SMMU_POLL_TIMEOUT_US); + if (ret) + dev_err(cmdqv->dev, "%sfailed to %sable, STATUS=0x%08X\n", + header, en ? "en" : "dis", regval); + if (out_enabled) + WRITE_ONCE(*out_enabled, regval & BIT(0)); + return ret; +} + +static inline int cmdqv_write_config(struct tegra241_cmdqv *cmdqv, u32 regval) +{ + return tegra241_cmdqv_write_config(cmdqv, + REG_CMDQV(cmdqv, CONFIG), + REG_CMDQV(cmdqv, STATUS), + regval, "CMDQV: ", NULL); +} + +static inline int vintf_write_config(struct tegra241_vintf *vintf, u32 regval) +{ + char header[16]; + + snprintf(header, 16, "VINTF%u: ", vintf->idx); + return tegra241_cmdqv_write_config(vintf->cmdqv, + REG_VINTF(vintf, CONFIG), + REG_VINTF(vintf, STATUS), + regval, header, &vintf->enabled); +} + +static inline char *lvcmdq_error_header(struct tegra241_vcmdq *vcmdq, + char *header, int hlen) +{ + WARN_ON(hlen < 32); + if (WARN_ON(!vcmdq->vintf)) + return ""; + snprintf(header, hlen, "VINTF%u: VCMDQ%u/LVCMDQ%u: ", + vcmdq->vintf->idx, vcmdq->idx, vcmdq->lidx); + return header; +} + +static inline int vcmdq_write_config(struct tegra241_vcmdq *vcmdq, u32 regval) +{ + char header[32], *h = lvcmdq_error_header(vcmdq, header, 32); + + return tegra241_cmdqv_write_config(vcmdq->cmdqv, + REG_VCMDQ_PAGE0(vcmdq, CONFIG), + REG_VCMDQ_PAGE0(vcmdq, STATUS), + regval, h, &vcmdq->enabled); +} + +/* ISR Functions */ + +static void tegra241_vintf0_handle_error(struct tegra241_vintf *vintf) +{ + int i; + + for (i = 0; i < LVCMDQ_ERR_MAP_NUM_64; i++) { + u64 map = readq_relaxed(REG_VINTF(vintf, LVCMDQ_ERR_MAP_64(i))); + + while (map) { + unsigned long lidx = __ffs64(map); + struct tegra241_vcmdq *vcmdq = vintf->lvcmdqs[lidx]; + u32 gerror = readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERROR)); + + __arm_smmu_cmdq_skip_err(&vintf->cmdqv->smmu, &vcmdq->cmdq); + writel(gerror, REG_VCMDQ_PAGE0(vcmdq, GERRORN)); + map &= ~BIT_ULL(lidx); + } + } +} + +static irqreturn_t tegra241_cmdqv_isr(int irq, void *devid) +{ + struct tegra241_cmdqv *cmdqv = (struct tegra241_cmdqv *)devid; + void __iomem *reg_vintf_map = REG_CMDQV(cmdqv, VINTF_ERR_MAP); + char err_str[256]; + u64 vintf_map; + + /* Use readl_relaxed() as register addresses are not 64-bit aligned */ + vintf_map = (u64)readl_relaxed(reg_vintf_map + 0x4) << 32 | + (u64)readl_relaxed(reg_vintf_map); + + snprintf(err_str, sizeof(err_str), + "vintf_map: %016llx, vcmdq_map %08x:%08x:%08x:%08x", vintf_map, + readl_relaxed(REG_CMDQV(cmdqv, CMDQ_ERR_MAP(3))), + readl_relaxed(REG_CMDQV(cmdqv, CMDQ_ERR_MAP(2))), + readl_relaxed(REG_CMDQV(cmdqv, CMDQ_ERR_MAP(1))), + readl_relaxed(REG_CMDQV(cmdqv, CMDQ_ERR_MAP(0)))); + + dev_warn(cmdqv->dev, "unexpected error reported. %s\n", err_str); + + /* Handle VINTF0 and its LVCMDQs */ + if (vintf_map & BIT_ULL(0)) { + tegra241_vintf0_handle_error(cmdqv->vintfs[0]); + vintf_map &= ~BIT_ULL(0); + } + + return IRQ_HANDLED; +} + +/* Command Queue Function */ + +static struct arm_smmu_cmdq * +tegra241_cmdqv_get_cmdq(struct arm_smmu_device *smmu) +{ + struct tegra241_cmdqv *cmdqv = + container_of(smmu, struct tegra241_cmdqv, smmu); + struct tegra241_vintf *vintf = cmdqv->vintfs[0]; + struct tegra241_vcmdq *vcmdq; + u16 lidx; + + if (READ_ONCE(bypass_vcmdq)) + return NULL; + + /* Use SMMU CMDQ if VINTF0 is uninitialized */ + if (!READ_ONCE(vintf->enabled)) + return NULL; + + /* + * Select a LVCMDQ to use. Here we use a temporal solution to + * balance out traffic on cmdq issuing: each cmdq has its own + * lock, if all cpus issue cmdlist using the same cmdq, only + * one CPU at a time can enter the process, while the others + * will be spinning at the same lock. + */ + lidx = smp_processor_id() % cmdqv->num_lvcmdqs_per_vintf; + vcmdq = vintf->lvcmdqs[lidx]; + if (!vcmdq || !READ_ONCE(vcmdq->enabled)) + return NULL; + return &vcmdq->cmdq; +} + +/* HW Reset Functions */ + +static void tegra241_vcmdq_hw_deinit(struct tegra241_vcmdq *vcmdq) +{ + char header[32], *h = lvcmdq_error_header(vcmdq, header, 32); + u32 gerrorn, gerror; + + if (vcmdq_write_config(vcmdq, 0)) { + dev_err(vcmdq->cmdqv->dev, + "%sGERRORN=0x%X, GERROR=0x%X, CONS=0x%X\n", h, + readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERRORN)), + readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERROR)), + readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, CONS))); + } + writel_relaxed(0, REG_VCMDQ_PAGE0(vcmdq, PROD)); + writel_relaxed(0, REG_VCMDQ_PAGE0(vcmdq, CONS)); + writeq_relaxed(0, REG_VCMDQ_PAGE1(vcmdq, BASE)); + writeq_relaxed(0, REG_VCMDQ_PAGE1(vcmdq, CONS_INDX_BASE)); + + gerrorn = readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERRORN)); + gerror = readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERROR)); + if (gerror != gerrorn) { + dev_warn(vcmdq->cmdqv->dev, + "%suncleared error detected, resetting\n", h); + writel(gerror, REG_VCMDQ_PAGE0(vcmdq, GERRORN)); + } + + dev_dbg(vcmdq->cmdqv->dev, "%sdeinited\n", h); +} + +static int tegra241_vcmdq_hw_init(struct tegra241_vcmdq *vcmdq) +{ + char header[32], *h = lvcmdq_error_header(vcmdq, header, 32); + int ret; + + /* Reset VCMDQ */ + tegra241_vcmdq_hw_deinit(vcmdq); + + /* Configure and enable VCMDQ */ + writeq_relaxed(vcmdq->cmdq.q.q_base, REG_VCMDQ_PAGE1(vcmdq, BASE)); + + ret = vcmdq_write_config(vcmdq, VCMDQ_EN); + if (ret) { + dev_err(vcmdq->cmdqv->dev, + "%sGERRORN=0x%X, GERROR=0x%X, CONS=0x%X\n", h, + readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERRORN)), + readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERROR)), + readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, CONS))); + return ret; + } + + dev_dbg(vcmdq->cmdqv->dev, "%sinited\n", h); + return 0; +} + +static void tegra241_vintf_hw_deinit(struct tegra241_vintf *vintf) +{ + u16 lidx; + + for (lidx = 0; lidx < vintf->cmdqv->num_lvcmdqs_per_vintf; lidx++) + if (vintf->lvcmdqs && vintf->lvcmdqs[lidx]) + tegra241_vcmdq_hw_deinit(vintf->lvcmdqs[lidx]); + vintf_write_config(vintf, 0); +} + +static int tegra241_vintf_hw_init(struct tegra241_vintf *vintf, bool hyp_own) +{ + u32 regval; + u16 lidx; + int ret; + + /* Reset VINTF */ + tegra241_vintf_hw_deinit(vintf); + + /* Configure and enable VINTF */ + regval = FIELD_PREP(VINTF_HYP_OWN, hyp_own); + writel(regval, REG_VINTF(vintf, CONFIG)); + + ret = vintf_write_config(vintf, regval | VINTF_EN); + if (ret) + return ret; + + for (lidx = 0; lidx < vintf->cmdqv->num_lvcmdqs_per_vintf; lidx++) { + if (vintf->lvcmdqs && vintf->lvcmdqs[lidx]) { + ret = tegra241_vcmdq_hw_init(vintf->lvcmdqs[lidx]); + if (ret) { + tegra241_vintf_hw_deinit(vintf); + return ret; + } + } + } + + return 0; +} + +static int tegra241_cmdqv_hw_reset(struct arm_smmu_device *smmu) +{ + struct tegra241_cmdqv *cmdqv = + container_of(smmu, struct tegra241_cmdqv, smmu); + u16 qidx, lidx, idx; + u32 regval; + int ret; + + /* Reset CMDQV */ + regval = readl_relaxed(REG_CMDQV(cmdqv, CONFIG)); + ret = cmdqv_write_config(cmdqv, regval & ~CMDQV_EN); + if (ret) + return ret; + ret = cmdqv_write_config(cmdqv, regval | CMDQV_EN); + if (ret) + return ret; + + /* Assign preallocated global VCMDQs to each VINTF as LVCMDQs */ + for (idx = 0, qidx = 0; idx < cmdqv->num_vintfs; idx++) { + for (lidx = 0; lidx < cmdqv->num_lvcmdqs_per_vintf; lidx++) { + regval = FIELD_PREP(CMDQV_CMDQ_ALLOC_VINTF, idx); + regval |= FIELD_PREP(CMDQV_CMDQ_ALLOC_LVCMDQ, lidx); + regval |= CMDQV_CMDQ_ALLOCATED; + writel_relaxed(regval, + REG_CMDQV(cmdqv, CMDQ_ALLOC(qidx++))); + } + } + + return tegra241_vintf_hw_init(cmdqv->vintfs[0], true); +} + +/* VCMDQ Resource Helpers */ + +static void tegra241_vcmdq_free_smmu_cmdq(struct tegra241_vcmdq *vcmdq) +{ + struct arm_smmu_queue *q = &vcmdq->cmdq.q; + size_t nents = 1 << q->llq.max_n_shift; + size_t qsz = nents << CMDQ_ENT_SZ_SHIFT; + + if (!q->base) + return; + dmam_free_coherent(vcmdq->cmdqv->smmu.dev, qsz, q->base, q->base_dma); +} + +static int tegra241_vcmdq_alloc_smmu_cmdq(struct tegra241_vcmdq *vcmdq) +{ + struct arm_smmu_device *smmu = &vcmdq->cmdqv->smmu; + struct arm_smmu_cmdq *cmdq = &vcmdq->cmdq; + struct arm_smmu_queue *q = &cmdq->q; + char name[16]; + int ret; + + snprintf(name, 16, "vcmdq%u", vcmdq->idx); + + q->llq.max_n_shift = VCMDQ_LOG2SIZE_MAX; + + /* Use the common helper to init the VCMDQ, and then... */ + ret = arm_smmu_init_one_queue(smmu, q, vcmdq->page0, + TEGRA241_VCMDQ_PROD, TEGRA241_VCMDQ_CONS, + CMDQ_ENT_DWORDS, name); + if (ret) + return ret; + + /* ...override q_base to write VCMDQ_BASE registers */ + q->q_base = q->base_dma & VCMDQ_ADDR; + q->q_base |= FIELD_PREP(VCMDQ_LOG2SIZE, q->llq.max_n_shift); + + return arm_smmu_cmdq_init(smmu, cmdq); +} + +/* VINTF Logical VCMDQ Resource Helpers */ + +static void tegra241_vintf_deinit_lvcmdq(struct tegra241_vintf *vintf, u16 lidx) +{ + vintf->lvcmdqs[lidx] = NULL; +} + +static int tegra241_vintf_init_lvcmdq(struct tegra241_vintf *vintf, u16 lidx, + struct tegra241_vcmdq *vcmdq) +{ + struct tegra241_cmdqv *cmdqv = vintf->cmdqv; + u16 idx = vintf->idx; + + vcmdq->idx = idx * cmdqv->num_lvcmdqs_per_vintf + lidx; + vcmdq->lidx = lidx; + vcmdq->cmdqv = cmdqv; + vcmdq->vintf = vintf; + vcmdq->page0 = cmdqv->base + TEGRA241_VINTFi_LVCMDQ_PAGE0(idx, lidx); + vcmdq->page1 = cmdqv->base + TEGRA241_VINTFi_LVCMDQ_PAGE1(idx, lidx); + + vintf->lvcmdqs[lidx] = vcmdq; + return 0; +} + +static void tegra241_vintf_free_lvcmdq(struct tegra241_vintf *vintf, u16 lidx) +{ + struct tegra241_vcmdq *vcmdq = vintf->lvcmdqs[lidx]; + char header[32]; + + tegra241_vcmdq_free_smmu_cmdq(vcmdq); + tegra241_vintf_deinit_lvcmdq(vintf, lidx); + + dev_dbg(vintf->cmdqv->dev, + "%sdeallocated\n", lvcmdq_error_header(vcmdq, header, 32)); + kfree(vcmdq); +} + +static struct tegra241_vcmdq * +tegra241_vintf_alloc_lvcmdq(struct tegra241_vintf *vintf, u16 lidx) +{ + struct tegra241_cmdqv *cmdqv = vintf->cmdqv; + struct tegra241_vcmdq *vcmdq; + char header[32]; + int ret; + + vcmdq = kzalloc(sizeof(*vcmdq), GFP_KERNEL); + if (!vcmdq) + return ERR_PTR(-ENOMEM); + + ret = tegra241_vintf_init_lvcmdq(vintf, lidx, vcmdq); + if (ret) + goto free_vcmdq; + + /* Build an arm_smmu_cmdq for each LVCMDQ */ + ret = tegra241_vcmdq_alloc_smmu_cmdq(vcmdq); + if (ret) + goto deinit_lvcmdq; + + dev_dbg(cmdqv->dev, + "%sallocated\n", lvcmdq_error_header(vcmdq, header, 32)); + return vcmdq; + +deinit_lvcmdq: + tegra241_vintf_deinit_lvcmdq(vintf, lidx); +free_vcmdq: + kfree(vcmdq); + return ERR_PTR(ret); +} + +/* VINTF Resource Helpers */ + +static void tegra241_cmdqv_deinit_vintf(struct tegra241_cmdqv *cmdqv, u16 idx) +{ + kfree(cmdqv->vintfs[idx]->lvcmdqs); + ida_free(&cmdqv->vintf_ids, idx); + cmdqv->vintfs[idx] = NULL; +} + +static int tegra241_cmdqv_init_vintf(struct tegra241_cmdqv *cmdqv, u16 max_idx, + struct tegra241_vintf *vintf) +{ + + u16 idx; + int ret; + + ret = ida_alloc_max(&cmdqv->vintf_ids, max_idx, GFP_KERNEL); + if (ret < 0) + return ret; + idx = ret; + + vintf->idx = idx; + vintf->cmdqv = cmdqv; + vintf->base = cmdqv->base + TEGRA241_VINTF(idx); + + vintf->lvcmdqs = kcalloc(cmdqv->num_lvcmdqs_per_vintf, + sizeof(*vintf->lvcmdqs), GFP_KERNEL); + if (!vintf->lvcmdqs) { + ida_free(&cmdqv->vintf_ids, idx); + return -ENOMEM; + } + + cmdqv->vintfs[idx] = vintf; + return ret; +} + +/* Remove Helpers */ + +static void tegra241_vintf_remove_lvcmdq(struct tegra241_vintf *vintf, u16 lidx) +{ + tegra241_vcmdq_hw_deinit(vintf->lvcmdqs[lidx]); + tegra241_vintf_free_lvcmdq(vintf, lidx); +} + +static void tegra241_cmdqv_remove_vintf(struct tegra241_cmdqv *cmdqv, u16 idx) +{ + struct tegra241_vintf *vintf = cmdqv->vintfs[idx]; + u16 lidx; + + /* Remove LVCMDQ resources */ + for (lidx = 0; lidx < vintf->cmdqv->num_lvcmdqs_per_vintf; lidx++) + if (vintf->lvcmdqs[lidx]) + tegra241_vintf_remove_lvcmdq(vintf, lidx); + + /* Remove VINTF resources */ + tegra241_vintf_hw_deinit(vintf); + + dev_dbg(cmdqv->dev, "VINTF%u: deallocated\n", vintf->idx); + tegra241_cmdqv_deinit_vintf(cmdqv, idx); + kfree(vintf); +} + +static void tegra241_cmdqv_remove(struct arm_smmu_device *smmu) +{ + struct tegra241_cmdqv *cmdqv = + container_of(smmu, struct tegra241_cmdqv, smmu); + u16 idx; + + /* Remove VINTF resources */ + for (idx = 0; idx < cmdqv->num_vintfs; idx++) { + if (cmdqv->vintfs[idx]) { + /* Only vintf0 should remain at this stage */ + WARN_ON(idx > 0); + tegra241_cmdqv_remove_vintf(cmdqv, idx); + } + } + + /* Remove cmdqv resources */ + ida_destroy(&cmdqv->vintf_ids); + + if (cmdqv->irq > 0) + free_irq(cmdqv->irq, cmdqv); + iounmap(cmdqv->base); + kfree(cmdqv->vintfs); + put_device(cmdqv->dev); /* smmu->impl_dev */ +} + +static struct arm_smmu_impl_ops tegra241_cmdqv_impl_ops = { + .get_secondary_cmdq = tegra241_cmdqv_get_cmdq, + .device_reset = tegra241_cmdqv_hw_reset, + .device_remove = tegra241_cmdqv_remove, +}; + +/* Probe Functions */ + +static int tegra241_cmdqv_acpi_is_memory(struct acpi_resource *res, void *data) +{ + struct resource_win win; + + return !acpi_dev_resource_address_space(res, &win); +} + +static int tegra241_cmdqv_acpi_get_irqs(struct acpi_resource *ares, void *data) +{ + struct resource r; + int *irq = data; + + if (*irq <= 0 && acpi_dev_resource_interrupt(ares, 0, &r)) + *irq = r.start; + return 1; /* No need to add resource to the list */ +} + +static struct resource * +tegra241_cmdqv_find_acpi_resource(struct device *dev, int *irq) +{ + struct acpi_device *adev = to_acpi_device(dev); + struct list_head resource_list; + struct resource_entry *rentry; + struct resource *res = NULL; + int ret; + + INIT_LIST_HEAD(&resource_list); + ret = acpi_dev_get_resources(adev, &resource_list, + tegra241_cmdqv_acpi_is_memory, NULL); + if (ret < 0) { + dev_err(dev, "failed to get memory resource: %d\n", ret); + return NULL; + } + + rentry = list_first_entry_or_null(&resource_list, + struct resource_entry, node); + if (!rentry) { + dev_err(dev, "failed to get memory resource entry\n"); + goto free_list; + } + + /* Caller must free the res */ + res = kzalloc(sizeof(*res), GFP_KERNEL); + if (!res) + goto free_list; + + *res = *rentry->res; + + acpi_dev_free_resource_list(&resource_list); + + INIT_LIST_HEAD(&resource_list); + + if (irq) + ret = acpi_dev_get_resources(adev, &resource_list, + tegra241_cmdqv_acpi_get_irqs, irq); + if (ret < 0 || !irq || *irq <= 0) + dev_warn(dev, "no interrupt. errors will not be reported\n"); + +free_list: + acpi_dev_free_resource_list(&resource_list); + return res; +} + +struct dentry *cmdqv_debugfs_dir; + +static struct arm_smmu_device * +__tegra241_cmdqv_probe(struct arm_smmu_device *smmu, struct resource *res, + int irq) +{ + static struct arm_smmu_device *new_smmu; + struct tegra241_cmdqv *cmdqv = NULL; + struct tegra241_vintf *vintf; + void __iomem *base; + u32 regval; + int lidx; + int ret; + + static_assert(offsetof(struct tegra241_cmdqv, smmu) == 0); + + base = ioremap(res->start, resource_size(res)); + if (IS_ERR(base)) { + dev_err(smmu->dev, "failed to ioremap: %ld\n", PTR_ERR(base)); + goto iounmap; + } + + regval = readl(base + TEGRA241_CMDQV_CONFIG); + if (disable_cmdqv) { + dev_info(smmu->dev, "Detected disable_cmdqv=true\n"); + writel(regval & ~CMDQV_EN, base + TEGRA241_CMDQV_CONFIG); + goto iounmap; + } + + cmdqv = devm_krealloc(smmu->dev, smmu, sizeof(*cmdqv), GFP_KERNEL); + if (!cmdqv) + goto iounmap; + new_smmu = &cmdqv->smmu; + + cmdqv->irq = irq; + cmdqv->base = base; + cmdqv->dev = smmu->impl_dev; + + if (cmdqv->irq > 0) { + ret = request_irq(irq, tegra241_cmdqv_isr, 0, "tegra241-cmdqv", + cmdqv); + if (ret) { + dev_err(cmdqv->dev, "failed to request irq (%d): %d\n", + cmdqv->irq, ret); + goto iounmap; + } + } + + regval = readl_relaxed(REG_CMDQV(cmdqv, PARAM)); + cmdqv->num_vintfs = 1 << FIELD_GET(CMDQV_NUM_VINTF_LOG2, regval); + cmdqv->num_vcmdqs = 1 << FIELD_GET(CMDQV_NUM_VCMDQ_LOG2, regval); + cmdqv->num_lvcmdqs_per_vintf = cmdqv->num_vcmdqs / cmdqv->num_vintfs; + + cmdqv->vintfs = + kcalloc(cmdqv->num_vintfs, sizeof(*cmdqv->vintfs), GFP_KERNEL); + if (!cmdqv->vintfs) + goto free_irq; + + ida_init(&cmdqv->vintf_ids); + + vintf = kzalloc(sizeof(*vintf), GFP_KERNEL); + if (!vintf) + goto destroy_ids; + + /* Init VINTF0 for in-kernel use */ + ret = tegra241_cmdqv_init_vintf(cmdqv, 0, vintf); + if (ret) { + dev_err(cmdqv->dev, "failed to init vintf0: %d\n", ret); + goto free_vintf; + } + + /* Preallocate logical VCMDQs to VINTF0 */ + for (lidx = 0; lidx < cmdqv->num_lvcmdqs_per_vintf; lidx++) { + struct tegra241_vcmdq *vcmdq; + + vcmdq = tegra241_vintf_alloc_lvcmdq(vintf, lidx); + if (IS_ERR(vcmdq)) + goto free_lvcmdq; + } + +#ifdef CONFIG_IOMMU_DEBUGFS + if (!cmdqv_debugfs_dir) { + cmdqv_debugfs_dir = + debugfs_create_dir("tegra241_cmdqv", iommu_debugfs_dir); + debugfs_create_bool("bypass_vcmdq", 0644, cmdqv_debugfs_dir, + &bypass_vcmdq); + } +#endif + + new_smmu->impl_ops = &tegra241_cmdqv_impl_ops; + + return new_smmu; + +free_lvcmdq: + for (lidx--; lidx >= 0; lidx--) + tegra241_vintf_free_lvcmdq(vintf, lidx); + tegra241_cmdqv_deinit_vintf(cmdqv, vintf->idx); +free_vintf: + kfree(vintf); +destroy_ids: + ida_destroy(&cmdqv->vintf_ids); + kfree(cmdqv->vintfs); +free_irq: + if (cmdqv->irq > 0) + free_irq(cmdqv->irq, cmdqv); +iounmap: + iounmap(base); + return NULL; +} + +struct arm_smmu_device *tegra241_cmdqv_probe(struct arm_smmu_device *smmu) +{ + struct arm_smmu_device *new_smmu; + struct resource *res = NULL; + int irq; + + if (!smmu->dev->of_node) + res = tegra241_cmdqv_find_acpi_resource(smmu->impl_dev, &irq); + if (!res) + goto out_fallback; + + new_smmu = __tegra241_cmdqv_probe(smmu, res, irq); + kfree(res); + + if (new_smmu) + return new_smmu; + +out_fallback: + dev_info(smmu->impl_dev, "Falling back to standard SMMU CMDQ\n"); + smmu->options &= ~ARM_SMMU_OPT_TEGRA241_CMDQV; + put_device(smmu->impl_dev); + return ERR_PTR(-ENODEV); +} From c6f6edb534a63cb2887980845fdc30bacf213a1c Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Thu, 29 Aug 2024 15:34:38 -0700 Subject: [PATCH 09/28] iommu/arm-smmu-v3: Start a new batch if new command is not supported The VCMDQ in the tegra241-cmdqv driver has a guest mode that supports only a few invalidation commands. A batch is initialized with a cmdq, so it has to confirm whether a new command is supported or not. Add a supports_cmd function pointer to the cmdq structure, where the vcmdq driver should hook a command scan function. Add an inline helper too so it can be used by both sides. If a new command is not supported, simply issue the existing batch and re- init it as a new batch. Change-Id: I0b8890b4e0c2df2a2824a3950ee23bb885167a40 Signed-off-by: Nicolin Chen Link: https://lore.kernel.org/r/aafb24b881504f18c5d0c7c15f2134e40ad2c486.1724970714.git.nicolinc@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Ashish Mhetre Reviewed-on: https://git-master.nvidia.com/r/c/3rdparty/canonical/linux-noble/+/3435436 GVS: buildbot_gerritrpt Reviewed-by: Pritesh Raithatha --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 6 ++++-- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 7 +++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 20e594e67459..eed59ac5d6bf 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -922,10 +922,12 @@ static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu, struct arm_smmu_cmdq_batch *cmds, struct arm_smmu_cmdq_ent *cmd) { + bool unsupported_cmd = !arm_smmu_cmdq_supports_cmd(cmds->cmdq, cmd); + bool force_sync = (cmds->num == CMDQ_BATCH_ENTRIES - 1) && + (smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC); int index; - if (cmds->num == CMDQ_BATCH_ENTRIES - 1 && - (smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC)) { + if (force_sync || unsupported_cmd) { arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds, cmds->num, true); arm_smmu_cmdq_batch_init(smmu, cmds); diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 233a4a57822b..df476f369f87 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -557,8 +557,15 @@ struct arm_smmu_cmdq { atomic_long_t *valid_map; atomic_t owner_prod; atomic_t lock; + bool (*supports_cmd)(struct arm_smmu_cmdq_ent *ent); }; +static inline bool arm_smmu_cmdq_supports_cmd(struct arm_smmu_cmdq *cmdq, + struct arm_smmu_cmdq_ent *ent) +{ + return cmdq->supports_cmd ? cmdq->supports_cmd(ent) : true; +} + struct arm_smmu_cmdq_batch { u64 cmds[CMDQ_BATCH_ENTRIES * CMDQ_ENT_DWORDS]; struct arm_smmu_cmdq *cmdq; From 009e96a7b5852fa3f04da6fc9fd0d8252e36968e Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Thu, 29 Aug 2024 15:34:39 -0700 Subject: [PATCH 10/28] iommu/tegra241-cmdqv: Limit CMDs for VCMDQs of a guest owned VINTF When VCMDQs are assigned to a VINTF owned by a guest (HYP_OWN bit unset), only TLB and ATC invalidation commands are supported by the VCMDQ HW. So, implement the new cmdq->supports_cmd op to scan the input cmd in order to make sure that it is supported by the selected queue. Note that the guest VM shouldn't have HYP_OWN bit being set regardless of guest kernel driver writing it or not, i.e. the hypervisor running in the host OS should wire this bit to zero when trapping a write access to this VINTF_CONFIG register from a guest kernel. Reviewed-by: Jason Gunthorpe Change-Id: Ic3375566b85f6f75796d23a483897f4e2c83c7fa Signed-off-by: Nicolin Chen Link: https://lore.kernel.org/r/8160292337059b91271045800e5c62f7295e2c24.1724970714.git.nicolinc@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Ashish Mhetre Reviewed-on: https://git-master.nvidia.com/r/c/3rdparty/canonical/linux-noble/+/3435437 GVS: buildbot_gerritrpt Reviewed-by: Pritesh Raithatha --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 28 ++++++++------- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 3 +- .../iommu/arm/arm-smmu-v3/tegra241-cmdqv.c | 34 ++++++++++++++++++- 3 files changed, 51 insertions(+), 14 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index eed59ac5d6bf..25d377541171 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -345,12 +345,13 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent) return 0; } -static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu) +static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq_ent *ent) { struct arm_smmu_cmdq *cmdq = NULL; if (smmu->impl_ops && smmu->impl_ops->get_secondary_cmdq) - cmdq = smmu->impl_ops->get_secondary_cmdq(smmu); + cmdq = smmu->impl_ops->get_secondary_cmdq(smmu, ent); return cmdq ?: &smmu->cmdq; } @@ -896,7 +897,7 @@ static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu, } return arm_smmu_cmdq_issue_cmdlist( - smmu, arm_smmu_get_cmdq(smmu), cmd, 1, sync); + smmu, arm_smmu_get_cmdq(smmu, ent), cmd, 1, sync); } static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu, @@ -912,10 +913,11 @@ static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu, } static void arm_smmu_cmdq_batch_init(struct arm_smmu_device *smmu, - struct arm_smmu_cmdq_batch *cmds) + struct arm_smmu_cmdq_batch *cmds, + struct arm_smmu_cmdq_ent *ent) { cmds->num = 0; - cmds->cmdq = arm_smmu_get_cmdq(smmu); + cmds->cmdq = arm_smmu_get_cmdq(smmu, ent); } static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu, @@ -930,13 +932,13 @@ static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu, if (force_sync || unsupported_cmd) { arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds, cmds->num, true); - arm_smmu_cmdq_batch_init(smmu, cmds); + arm_smmu_cmdq_batch_init(smmu, cmds, cmd); } if (cmds->num == CMDQ_BATCH_ENTRIES) { arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds, cmds->num, false); - arm_smmu_cmdq_batch_init(smmu, cmds); + arm_smmu_cmdq_batch_init(smmu, cmds, cmd); } index = cmds->num * CMDQ_ENT_DWORDS; @@ -1021,7 +1023,7 @@ static void arm_smmu_sync_cd(struct arm_smmu_master *master, }, }; - arm_smmu_cmdq_batch_init(smmu, &cmds); + arm_smmu_cmdq_batch_init(smmu, &cmds, &cmd); for (i = 0; i < master->num_streams; i++) { cmd.cfgi.sid = master->streams[i].id; arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd); @@ -1822,7 +1824,7 @@ static int arm_smmu_atc_inv_master(struct arm_smmu_master *master) arm_smmu_atc_inv_to_cmd(IOMMU_NO_PASID, 0, 0, &cmd); - arm_smmu_cmdq_batch_init(master->smmu, &cmds); + arm_smmu_cmdq_batch_init(master->smmu, &cmds, &cmd); for (i = 0; i < master->num_streams; i++) { cmd.atc.sid = master->streams[i].id; arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd); @@ -1836,7 +1838,9 @@ int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid, { int i; unsigned long flags; - struct arm_smmu_cmdq_ent cmd; + struct arm_smmu_cmdq_ent cmd = { + .opcode = CMDQ_OP_ATC_INV, + }; struct arm_smmu_master *master; struct arm_smmu_cmdq_batch cmds; @@ -1862,7 +1866,7 @@ int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid, arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd); - arm_smmu_cmdq_batch_init(smmu_domain->smmu, &cmds); + arm_smmu_cmdq_batch_init(smmu_domain->smmu, &cmds, &cmd); spin_lock_irqsave(&smmu_domain->devices_lock, flags); list_for_each_entry(master, &smmu_domain->devices, domain_head) { @@ -1939,7 +1943,7 @@ static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd, num_pages++; } - arm_smmu_cmdq_batch_init(smmu, &cmds); + arm_smmu_cmdq_batch_init(smmu, &cmds, cmd); while (iova < end) { if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) { diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index df476f369f87..8dcfa37d3653 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -636,7 +636,8 @@ struct arm_smmu_strtab_cfg { struct arm_smmu_impl_ops { int (*device_reset)(struct arm_smmu_device *smmu); void (*device_remove)(struct arm_smmu_device *smmu); - struct arm_smmu_cmdq *(*get_secondary_cmdq)(struct arm_smmu_device *smmu); + struct arm_smmu_cmdq *(*get_secondary_cmdq)( + struct arm_smmu_device *smmu, struct arm_smmu_cmdq_ent *ent); }; /* An SMMUv3 instance */ diff --git a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c index 5ac3032ee6dd..9eb9d959f3e5 100644 --- a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c +++ b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c @@ -142,6 +142,7 @@ struct tegra241_vcmdq { * struct tegra241_vintf - Virtual Interface * @idx: Global index in the CMDQV * @enabled: Enable status + * @hyp_own: Owned by hypervisor (in-kernel) * @cmdqv: Parent CMDQV pointer * @lvcmdqs: List of logical VCMDQ pointers * @base: MMIO base address @@ -150,6 +151,7 @@ struct tegra241_vintf { u16 idx; bool enabled; + bool hyp_own; struct tegra241_cmdqv *cmdqv; struct tegra241_vcmdq **lvcmdqs; @@ -301,8 +303,21 @@ static irqreturn_t tegra241_cmdqv_isr(int irq, void *devid) /* Command Queue Function */ +static bool tegra241_guest_vcmdq_supports_cmd(struct arm_smmu_cmdq_ent *ent) +{ + switch (ent->opcode) { + case CMDQ_OP_TLBI_NH_ASID: + case CMDQ_OP_TLBI_NH_VA: + case CMDQ_OP_ATC_INV: + return true; + default: + return false; + } +} + static struct arm_smmu_cmdq * -tegra241_cmdqv_get_cmdq(struct arm_smmu_device *smmu) +tegra241_cmdqv_get_cmdq(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq_ent *ent) { struct tegra241_cmdqv *cmdqv = container_of(smmu, struct tegra241_cmdqv, smmu); @@ -328,6 +343,10 @@ tegra241_cmdqv_get_cmdq(struct arm_smmu_device *smmu) vcmdq = vintf->lvcmdqs[lidx]; if (!vcmdq || !READ_ONCE(vcmdq->enabled)) return NULL; + + /* Unsupported CMD goes for smmu->cmdq pathway */ + if (!arm_smmu_cmdq_supports_cmd(&vcmdq->cmdq, ent)) + return NULL; return &vcmdq->cmdq; } @@ -406,12 +425,22 @@ static int tegra241_vintf_hw_init(struct tegra241_vintf *vintf, bool hyp_own) tegra241_vintf_hw_deinit(vintf); /* Configure and enable VINTF */ + /* + * Note that HYP_OWN bit is wired to zero when running in guest kernel, + * whether enabling it here or not, as !HYP_OWN cmdq HWs only support a + * restricted set of supported commands. + */ regval = FIELD_PREP(VINTF_HYP_OWN, hyp_own); writel(regval, REG_VINTF(vintf, CONFIG)); ret = vintf_write_config(vintf, regval | VINTF_EN); if (ret) return ret; + /* + * As being mentioned above, HYP_OWN bit is wired to zero for a guest + * kernel, so read it back from HW to ensure that reflects in hyp_own + */ + vintf->hyp_own = !!(VINTF_HYP_OWN & readl(REG_VINTF(vintf, CONFIG))); for (lidx = 0; lidx < vintf->cmdqv->num_lvcmdqs_per_vintf; lidx++) { if (vintf->lvcmdqs && vintf->lvcmdqs[lidx]) { @@ -493,6 +522,9 @@ static int tegra241_vcmdq_alloc_smmu_cmdq(struct tegra241_vcmdq *vcmdq) q->q_base = q->base_dma & VCMDQ_ADDR; q->q_base |= FIELD_PREP(VCMDQ_LOG2SIZE, q->llq.max_n_shift); + if (!vcmdq->vintf->hyp_own) + cmdq->supports_cmd = tegra241_guest_vcmdq_supports_cmd; + return arm_smmu_cmdq_init(smmu, cmdq); } From 3c1a5006e3899da25022a21ea1b897397bf34902 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Sun, 1 Sep 2024 22:57:45 -0700 Subject: [PATCH 11/28] iommu/tegra241-cmdqv: Fix -Wformat-truncation warnings in lvcmdq_error_header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Kernel test robot reported a few trucation warnings at the snprintf: drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c: In function ‘tegra241_vintf_free_lvcmdq’: drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c:239:56: warning: ‘%u’ directive output may be truncated writing between 1 and 5 bytes into a region of size between 3 and 11 [-Wformat-truncation=] 239 | snprintf(header, hlen, "VINTF%u: VCMDQ%u/LVCMDQ%u: ", | ^~ drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c:239:32: note: directive argument in the range [0, 65535] 239 | snprintf(header, hlen, "VINTF%u: VCMDQ%u/LVCMDQ%u: ", | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c:239:9: note: ‘snprintf’ output between 25 and 37 bytes into a destination of size 32 239 | snprintf(header, hlen, "VINTF%u: VCMDQ%u/LVCMDQ%u: ", | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 240 | vcmdq->vintf->idx, vcmdq->idx, vcmdq->lidx); Fix by bumping up the size of the header to hold more characters. Fixes: 918eb5c856f6 ("iommu/arm-smmu-v3: Add in-kernel support for NVIDIA Tegra241 (Grace) CMDQV") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202409020406.7ed5uojF-lkp@intel.com/ Change-Id: I4329a30cc73b2333eaa6812a5c5e4cd52619549a Signed-off-by: Nicolin Chen Link: https://lore.kernel.org/r/20240902055745.629456-1-nicolinc@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Ashish Mhetre Reviewed-on: https://git-master.nvidia.com/r/c/3rdparty/canonical/linux-noble/+/3435438 Reviewed-by: Pritesh Raithatha GVS: buildbot_gerritrpt --- drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c index 9eb9d959f3e5..03fd13c21dcc 100644 --- a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c +++ b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c @@ -233,7 +233,7 @@ static inline int vintf_write_config(struct tegra241_vintf *vintf, u32 regval) static inline char *lvcmdq_error_header(struct tegra241_vcmdq *vcmdq, char *header, int hlen) { - WARN_ON(hlen < 32); + WARN_ON(hlen < 64); if (WARN_ON(!vcmdq->vintf)) return ""; snprintf(header, hlen, "VINTF%u: VCMDQ%u/LVCMDQ%u: ", @@ -243,7 +243,7 @@ static inline char *lvcmdq_error_header(struct tegra241_vcmdq *vcmdq, static inline int vcmdq_write_config(struct tegra241_vcmdq *vcmdq, u32 regval) { - char header[32], *h = lvcmdq_error_header(vcmdq, header, 32); + char header[64], *h = lvcmdq_error_header(vcmdq, header, 64); return tegra241_cmdqv_write_config(vcmdq->cmdqv, REG_VCMDQ_PAGE0(vcmdq, CONFIG), @@ -354,7 +354,7 @@ tegra241_cmdqv_get_cmdq(struct arm_smmu_device *smmu, static void tegra241_vcmdq_hw_deinit(struct tegra241_vcmdq *vcmdq) { - char header[32], *h = lvcmdq_error_header(vcmdq, header, 32); + char header[64], *h = lvcmdq_error_header(vcmdq, header, 64); u32 gerrorn, gerror; if (vcmdq_write_config(vcmdq, 0)) { @@ -382,7 +382,7 @@ static void tegra241_vcmdq_hw_deinit(struct tegra241_vcmdq *vcmdq) static int tegra241_vcmdq_hw_init(struct tegra241_vcmdq *vcmdq) { - char header[32], *h = lvcmdq_error_header(vcmdq, header, 32); + char header[64], *h = lvcmdq_error_header(vcmdq, header, 64); int ret; /* Reset VCMDQ */ @@ -555,13 +555,13 @@ static int tegra241_vintf_init_lvcmdq(struct tegra241_vintf *vintf, u16 lidx, static void tegra241_vintf_free_lvcmdq(struct tegra241_vintf *vintf, u16 lidx) { struct tegra241_vcmdq *vcmdq = vintf->lvcmdqs[lidx]; - char header[32]; + char header[64]; tegra241_vcmdq_free_smmu_cmdq(vcmdq); tegra241_vintf_deinit_lvcmdq(vintf, lidx); dev_dbg(vintf->cmdqv->dev, - "%sdeallocated\n", lvcmdq_error_header(vcmdq, header, 32)); + "%sdeallocated\n", lvcmdq_error_header(vcmdq, header, 64)); kfree(vcmdq); } @@ -570,7 +570,7 @@ tegra241_vintf_alloc_lvcmdq(struct tegra241_vintf *vintf, u16 lidx) { struct tegra241_cmdqv *cmdqv = vintf->cmdqv; struct tegra241_vcmdq *vcmdq; - char header[32]; + char header[64]; int ret; vcmdq = kzalloc(sizeof(*vcmdq), GFP_KERNEL); @@ -587,7 +587,7 @@ tegra241_vintf_alloc_lvcmdq(struct tegra241_vintf *vintf, u16 lidx) goto deinit_lvcmdq; dev_dbg(cmdqv->dev, - "%sallocated\n", lvcmdq_error_header(vcmdq, header, 32)); + "%sallocated\n", lvcmdq_error_header(vcmdq, header, 64)); return vcmdq; deinit_lvcmdq: From b9be69606e724e2eeb393b1ac6b412596d206205 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 4 Sep 2024 11:02:43 +0300 Subject: [PATCH 12/28] iommu/tegra241-cmdqv: Fix ioremap() error handling in probe() The ioremap() function doesn't return error pointers, it returns NULL on error so update the error handling. Also just return directly instead of calling iounmap() on the NULL pointer. Calling iounmap(NULL) doesn't cause a problem on ARM but on other architectures it can trigger a warning so it'a bad habbit. Fixes: 918eb5c856f6 ("iommu/arm-smmu-v3: Add in-kernel support for NVIDIA Tegra241 (Grace) CMDQV") Change-Id: I2ffed2cf23db0632c47f667a4b35584c4d181b7a Signed-off-by: Dan Carpenter Reviewed-by: Nicolin Chen Link: https://lore.kernel.org/r/5a6c1e9a-0724-41b1-86d4-36335d3768ea@stanley.mountain Signed-off-by: Will Deacon Signed-off-by: Ashish Mhetre Reviewed-on: https://git-master.nvidia.com/r/c/3rdparty/canonical/linux-noble/+/3435439 GVS: buildbot_gerritrpt Reviewed-by: Pritesh Raithatha --- drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c index 03fd13c21dcc..240b54192177 100644 --- a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c +++ b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c @@ -772,9 +772,9 @@ __tegra241_cmdqv_probe(struct arm_smmu_device *smmu, struct resource *res, static_assert(offsetof(struct tegra241_cmdqv, smmu) == 0); base = ioremap(res->start, resource_size(res)); - if (IS_ERR(base)) { - dev_err(smmu->dev, "failed to ioremap: %ld\n", PTR_ERR(base)); - goto iounmap; + if (!base) { + dev_err(smmu->dev, "failed to ioremap\n"); + return NULL; } regval = readl(base + TEGRA241_CMDQV_CONFIG); From 98f97f6df72e38ef892f102c81adc02a31be65c0 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Wed, 4 Sep 2024 19:40:42 -0700 Subject: [PATCH 13/28] iommu/tegra241-cmdqv: Drop static at local variable This is likely a typo. Drop it. Fixes: 918eb5c856f6 ("iommu/arm-smmu-v3: Add in-kernel support for NVIDIA Tegra241 (Grace) CMDQV") Change-Id: I903366a8fa224ae3542dd1b592c8e51be9c4cb90 Signed-off-by: Nicolin Chen Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/13fd3accb5b7ed6ec11cc6b7435f79f84af9f45f.1725503154.git.nicolinc@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Ashish Mhetre Reviewed-on: https://git-master.nvidia.com/r/c/3rdparty/canonical/linux-noble/+/3435440 GVS: buildbot_gerritrpt Reviewed-by: Pritesh Raithatha --- drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c index 240b54192177..0766dc2789cb 100644 --- a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c +++ b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c @@ -761,8 +761,8 @@ static struct arm_smmu_device * __tegra241_cmdqv_probe(struct arm_smmu_device *smmu, struct resource *res, int irq) { - static struct arm_smmu_device *new_smmu; struct tegra241_cmdqv *cmdqv = NULL; + struct arm_smmu_device *new_smmu; struct tegra241_vintf *vintf; void __iomem *base; u32 regval; From 737d405b11bc8078e57cdd0f216aa13234860bf1 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Wed, 4 Sep 2024 19:40:43 -0700 Subject: [PATCH 14/28] iommu/tegra241-cmdqv: Do not allocate vcmdq until dma_set_mask_and_coherent It's observed that, when the first 4GB of system memory was reserved, all VCMDQ allocations failed (even with the smallest qsz in the last attempt): arm-smmu-v3: found companion CMDQV device: NVDA200C:00 arm-smmu-v3: option mask 0x10 arm-smmu-v3: failed to allocate queue (0x8000 bytes) for vcmdq0 acpi NVDA200C:00: tegra241_cmdqv: Falling back to standard SMMU CMDQ arm-smmu-v3: ias 48-bit, oas 48-bit (features 0x001e1fbf) arm-smmu-v3: allocated 524288 entries for cmdq arm-smmu-v3: allocated 524288 entries for evtq arm-smmu-v3: allocated 524288 entries for priq This is because the 4GB reserved memory shifted the entire DMA zone from a lower 32-bit range (on a system without the 4GB carveout) to higher range, while the dev->coherent_dma_mask was set to DMA_BIT_MASK(32) by default. The dma_set_mask_and_coherent() call is done in arm_smmu_device_hw_probe() of the SMMU driver. So any DMA allocation from tegra241_cmdqv_probe() must wait until the coherent_dma_mask is correctly set. Move the vintf/vcmdq structure initialization routine into a different op, "init_structures". Call it at the end of arm_smmu_init_structures(), where standard SMMU queues get allocated. Most of the impl_ops aren't ready until vintf/vcmdq structure are init-ed. So replace the full impl_ops with an init_ops in __tegra241_cmdqv_probe(). And switch to tegra241_cmdqv_impl_ops later in arm_smmu_init_structures(). Note that tegra241_cmdqv_impl_ops does not link to the new init_structures op after this switch, since there is no point in having it once it's done. Fixes: 918eb5c856f6 ("iommu/arm-smmu-v3: Add in-kernel support for NVIDIA Tegra241 (Grace) CMDQV") Reported-by: Matt Ochs Change-Id: Ie7725620055e79702ce2b1bfb3beef624602ccd1 Signed-off-by: Nicolin Chen Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/530993c3aafa1b0fc3d879b8119e13c629d12e2b.1725503154.git.nicolinc@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Ashish Mhetre Reviewed-on: https://git-master.nvidia.com/r/c/3rdparty/canonical/linux-noble/+/3435441 Reviewed-by: Pritesh Raithatha GVS: buildbot_gerritrpt --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 9 +- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 1 + .../iommu/arm/arm-smmu-v3/tegra241-cmdqv.c | 83 ++++++++++++------- 3 files changed, 60 insertions(+), 33 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 25d377541171..77c0cc385295 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -3126,7 +3126,14 @@ static int arm_smmu_init_structures(struct arm_smmu_device *smmu) if (ret) return ret; - return arm_smmu_init_strtab(smmu); + ret = arm_smmu_init_strtab(smmu); + if (ret) + return ret; + + if (smmu->impl_ops && smmu->impl_ops->init_structures) + return smmu->impl_ops->init_structures(smmu); + + return 0; } static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val, diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 8dcfa37d3653..1bd34ee6ef7e 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -636,6 +636,7 @@ struct arm_smmu_strtab_cfg { struct arm_smmu_impl_ops { int (*device_reset)(struct arm_smmu_device *smmu); void (*device_remove)(struct arm_smmu_device *smmu); + int (*init_structures)(struct arm_smmu_device *smmu); struct arm_smmu_cmdq *(*get_secondary_cmdq)( struct arm_smmu_device *smmu, struct arm_smmu_cmdq_ent *ent); }; diff --git a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c index 0766dc2789cb..fcd13d301fff 100644 --- a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c +++ b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c @@ -755,18 +755,65 @@ free_list: return res; } +static int tegra241_cmdqv_init_structures(struct arm_smmu_device *smmu) +{ + struct tegra241_cmdqv *cmdqv = + container_of(smmu, struct tegra241_cmdqv, smmu); + struct tegra241_vintf *vintf; + int lidx; + int ret; + + vintf = kzalloc(sizeof(*vintf), GFP_KERNEL); + if (!vintf) + goto out_fallback; + + /* Init VINTF0 for in-kernel use */ + ret = tegra241_cmdqv_init_vintf(cmdqv, 0, vintf); + if (ret) { + dev_err(cmdqv->dev, "failed to init vintf0: %d\n", ret); + goto free_vintf; + } + + /* Preallocate logical VCMDQs to VINTF0 */ + for (lidx = 0; lidx < cmdqv->num_lvcmdqs_per_vintf; lidx++) { + struct tegra241_vcmdq *vcmdq; + + vcmdq = tegra241_vintf_alloc_lvcmdq(vintf, lidx); + if (IS_ERR(vcmdq)) + goto free_lvcmdq; + } + + /* Now, we are ready to run all the impl ops */ + smmu->impl_ops = &tegra241_cmdqv_impl_ops; + return 0; + +free_lvcmdq: + for (lidx--; lidx >= 0; lidx--) + tegra241_vintf_free_lvcmdq(vintf, lidx); + tegra241_cmdqv_deinit_vintf(cmdqv, vintf->idx); +free_vintf: + kfree(vintf); +out_fallback: + dev_info(smmu->impl_dev, "Falling back to standard SMMU CMDQ\n"); + smmu->options &= ~ARM_SMMU_OPT_TEGRA241_CMDQV; + tegra241_cmdqv_remove(smmu); + return 0; +} + struct dentry *cmdqv_debugfs_dir; static struct arm_smmu_device * __tegra241_cmdqv_probe(struct arm_smmu_device *smmu, struct resource *res, int irq) { + static const struct arm_smmu_impl_ops init_ops = { + .init_structures = tegra241_cmdqv_init_structures, + .device_remove = tegra241_cmdqv_remove, + }; struct tegra241_cmdqv *cmdqv = NULL; struct arm_smmu_device *new_smmu; - struct tegra241_vintf *vintf; void __iomem *base; u32 regval; - int lidx; int ret; static_assert(offsetof(struct tegra241_cmdqv, smmu) == 0); @@ -815,26 +862,6 @@ __tegra241_cmdqv_probe(struct arm_smmu_device *smmu, struct resource *res, ida_init(&cmdqv->vintf_ids); - vintf = kzalloc(sizeof(*vintf), GFP_KERNEL); - if (!vintf) - goto destroy_ids; - - /* Init VINTF0 for in-kernel use */ - ret = tegra241_cmdqv_init_vintf(cmdqv, 0, vintf); - if (ret) { - dev_err(cmdqv->dev, "failed to init vintf0: %d\n", ret); - goto free_vintf; - } - - /* Preallocate logical VCMDQs to VINTF0 */ - for (lidx = 0; lidx < cmdqv->num_lvcmdqs_per_vintf; lidx++) { - struct tegra241_vcmdq *vcmdq; - - vcmdq = tegra241_vintf_alloc_lvcmdq(vintf, lidx); - if (IS_ERR(vcmdq)) - goto free_lvcmdq; - } - #ifdef CONFIG_IOMMU_DEBUGFS if (!cmdqv_debugfs_dir) { cmdqv_debugfs_dir = @@ -844,19 +871,11 @@ __tegra241_cmdqv_probe(struct arm_smmu_device *smmu, struct resource *res, } #endif - new_smmu->impl_ops = &tegra241_cmdqv_impl_ops; + /* Provide init-level ops only, until tegra241_cmdqv_init_structures */ + new_smmu->impl_ops = &init_ops; return new_smmu; -free_lvcmdq: - for (lidx--; lidx >= 0; lidx--) - tegra241_vintf_free_lvcmdq(vintf, lidx); - tegra241_cmdqv_deinit_vintf(cmdqv, vintf->idx); -free_vintf: - kfree(vintf); -destroy_ids: - ida_destroy(&cmdqv->vintf_ids); - kfree(cmdqv->vintfs); free_irq: if (cmdqv->irq > 0) free_irq(cmdqv->irq, cmdqv); From 78730786afb9dd9773212e60c761cbf7003ce2db Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Mon, 21 Oct 2024 16:08:46 -0700 Subject: [PATCH 15/28] iommu/tegra241-cmdqv: Staticize cmdqv_debugfs_dir Fix a sparse warning. Fixes: 918eb5c856f6 ("iommu/arm-smmu-v3: Add in-kernel support for NVIDIA Tegra241 (Grace) CMDQV") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202410172003.bRQEReTc-lkp@intel.com/ Change-Id: Icbd7e06c479e96f6b813021fb3eaf07fb0ae899e Signed-off-by: Nicolin Chen Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20241021230847.811218-1-nicolinc@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Ashish Mhetre Reviewed-on: https://git-master.nvidia.com/r/c/3rdparty/canonical/linux-noble/+/3435442 Reviewed-by: Pritesh Raithatha GVS: buildbot_gerritrpt --- drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c index fcd13d301fff..a243c543598c 100644 --- a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c +++ b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c @@ -800,7 +800,7 @@ out_fallback: return 0; } -struct dentry *cmdqv_debugfs_dir; +static struct dentry *cmdqv_debugfs_dir; static struct arm_smmu_device * __tegra241_cmdqv_probe(struct arm_smmu_device *smmu, struct resource *res, From e3a532d6e48b8a293ba2de18e6cd3e3863473493 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 29 Oct 2024 15:58:24 +0000 Subject: [PATCH 16/28] iommu/tegra241-cmdqv: Fix unused variable warning While testing some io-pgtable changes, I ran into a compiler warning from the Tegra CMDQ driver: drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c:803:23: warning: unused variable 'cmdqv_debugfs_dir' [-Wunused-variable] 803 | static struct dentry *cmdqv_debugfs_dir; | ^~~~~~~~~~~~~~~~~ 1 warning generated. Guard the variable declaration with CONFIG_IOMMU_DEBUGFS to silence the warning. Change-Id: I3699eaccd846f23418c9d5cae609ffd67429a76e Signed-off-by: Will Deacon Signed-off-by: Ashish Mhetre Reviewed-on: https://git-master.nvidia.com/r/c/3rdparty/canonical/linux-noble/+/3435443 Reviewed-by: Pritesh Raithatha GVS: buildbot_gerritrpt --- drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c index a243c543598c..6c7770e79af6 100644 --- a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c +++ b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c @@ -800,7 +800,9 @@ out_fallback: return 0; } +#ifdef CONFIG_IOMMU_DEBUGFS static struct dentry *cmdqv_debugfs_dir; +#endif static struct arm_smmu_device * __tegra241_cmdqv_probe(struct arm_smmu_device *smmu, struct resource *res, From f0d493630ec76d6c14e0c9c4cc0e3a43a2f2b146 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Sun, 10 Nov 2024 19:02:26 -0800 Subject: [PATCH 17/28] iommu/tegra241-cmdqv: Fix alignment failure at max_n_shift When configuring a kernel with PAGE_SIZE=4KB, depending on its setting of CONFIG_CMA_ALIGNMENT, VCMDQ_LOG2SIZE_MAX=19 could fail the alignment test and trigger a WARN_ON: WARNING: at drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c:3646 Call trace: arm_smmu_init_one_queue+0x15c/0x210 tegra241_cmdqv_init_structures+0x114/0x338 arm_smmu_device_probe+0xb48/0x1d90 Fix it by capping max_n_shift to CMDQ_MAX_SZ_SHIFT as SMMUv3 CMDQ does. Fixes: 918eb5c856f6 ("iommu/arm-smmu-v3: Add in-kernel support for NVIDIA Tegra241 (Grace) CMDQV") Change-Id: I7136ff56bcb624a2f53503ecbb1f494ee90f681d Signed-off-by: Nicolin Chen Link: https://lore.kernel.org/r/20241111030226.1940737-1-nicolinc@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Ashish Mhetre Reviewed-on: https://git-master.nvidia.com/r/c/3rdparty/canonical/linux-noble/+/3435444 Reviewed-by: Pritesh Raithatha GVS: buildbot_gerritrpt --- drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c index 6c7770e79af6..c8ec74f089f3 100644 --- a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c +++ b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c @@ -509,7 +509,8 @@ static int tegra241_vcmdq_alloc_smmu_cmdq(struct tegra241_vcmdq *vcmdq) snprintf(name, 16, "vcmdq%u", vcmdq->idx); - q->llq.max_n_shift = VCMDQ_LOG2SIZE_MAX; + /* Queue size, capped to ensure natural alignment */ + q->llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT, VCMDQ_LOG2SIZE_MAX); /* Use the common helper to init the VCMDQ, and then... */ ret = arm_smmu_init_one_queue(smmu, q, vcmdq->page0, From 6e8b7fa76763e980633834e8612aadcae3ba83f1 Mon Sep 17 00:00:00 2001 From: "Luis Claudio R. Goncalves" Date: Fri, 6 Dec 2024 10:01:14 -0300 Subject: [PATCH 18/28] iommu/tegra241-cmdqv: do not use smp_processor_id in preemptible context During boot some of the calls to tegra241_cmdqv_get_cmdq() will happen in preemptible context. As this function calls smp_processor_id(), if CONFIG_DEBUG_PREEMPT is enabled, these calls will trigger a series of "BUG: using smp_processor_id() in preemptible" backtraces. As tegra241_cmdqv_get_cmdq() only calls smp_processor_id() to use the CPU number as a factor to balance out traffic on cmdq usage, it is safe to use raw_smp_processor_id() here. Cc: Fixes: 918eb5c856f6 ("iommu/arm-smmu-v3: Add in-kernel support for NVIDIA Tegra241 (Grace) CMDQV") Change-Id: I170a6c2f6846d75228750bca5ecd8e5efd90f231 Signed-off-by: Luis Claudio R. Goncalves Reviewed-by: Jason Gunthorpe Reviewed-by: Nicolin Chen Tested-by: Nicolin Chen Link: https://lore.kernel.org/r/Z1L1mja3nXzsJ0Pk@uudg.org Signed-off-by: Will Deacon Signed-off-by: Ashish Mhetre Reviewed-on: https://git-master.nvidia.com/r/c/3rdparty/canonical/linux-noble/+/3435445 GVS: buildbot_gerritrpt Reviewed-by: Pritesh Raithatha --- drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c index c8ec74f089f3..6e41ddaa24d6 100644 --- a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c +++ b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c @@ -339,7 +339,7 @@ tegra241_cmdqv_get_cmdq(struct arm_smmu_device *smmu, * one CPU at a time can enter the process, while the others * will be spinning at the same lock. */ - lidx = smp_processor_id() % cmdqv->num_lvcmdqs_per_vintf; + lidx = raw_smp_processor_id() % cmdqv->num_lvcmdqs_per_vintf; vcmdq = vintf->lvcmdqs[lidx]; if (!vcmdq || !READ_ONCE(vcmdq->enabled)) return NULL; From 873bfe47e305752e37bb25af281f9b96d19da6d1 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Wed, 18 Dec 2024 21:14:21 -0800 Subject: [PATCH 19/28] iommu/tegra241-cmdqv: Read SMMU IDR1.CMDQS instead of hardcoding The hardware limitation "max=19" actually comes from SMMU Command Queue. So, it'd be more natural for tegra241-cmdqv driver to read it out rather than hardcoding it itself. This is not an issue yet for a kernel on a baremetal system, but a guest kernel setting the queue base/size in form of IPA/gPA might result in a noncontiguous queue in the physical address space, if underlying physical pages backing up the guest RAM aren't contiguous entirely: e.g. 2MB-page backed guest RAM cannot guarantee a contiguous queue if it is 8MB (capped to VCMDQ_LOG2SIZE_MAX=19). This might lead to command errors when HW does linear-read from a noncontiguous queue memory. Adding this extra IDR1.CMDQS cap (in the guest kernel) allows VMM to set SMMU's IDR1.CMDQS=17 for the case mentioned above, so a guest-level queue will be capped to maximum 2MB, ensuring a contiguous queue memory. Fixes: a3799717b881 ("iommu/tegra241-cmdqv: Fix alignment failure at max_n_shift") Reported-by: Ian Kalinowski Cc: stable@vger.kernel.org Change-Id: I3de2a6e757001ef6bb797ab4368f808550a87d06 Signed-off-by: Nicolin Chen Link: https://lore.kernel.org/r/20241219051421.1850267-1-nicolinc@nvidia.com Signed-off-by: Will Deacon Signed-off-by: Ashish Mhetre Reviewed-on: https://git-master.nvidia.com/r/c/3rdparty/canonical/linux-noble/+/3435446 GVS: buildbot_gerritrpt Reviewed-by: Pritesh Raithatha --- drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c index 6e41ddaa24d6..d525ab43a4ae 100644 --- a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c +++ b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c @@ -79,7 +79,6 @@ #define TEGRA241_VCMDQ_PAGE1(q) (TEGRA241_VCMDQ_PAGE1_BASE + 0x80*(q)) #define VCMDQ_ADDR GENMASK(47, 5) #define VCMDQ_LOG2SIZE GENMASK(4, 0) -#define VCMDQ_LOG2SIZE_MAX 19 #define TEGRA241_VCMDQ_BASE 0x00000 #define TEGRA241_VCMDQ_CONS_INDX_BASE 0x00008 @@ -505,12 +504,15 @@ static int tegra241_vcmdq_alloc_smmu_cmdq(struct tegra241_vcmdq *vcmdq) struct arm_smmu_cmdq *cmdq = &vcmdq->cmdq; struct arm_smmu_queue *q = &cmdq->q; char name[16]; + u32 regval; int ret; snprintf(name, 16, "vcmdq%u", vcmdq->idx); - /* Queue size, capped to ensure natural alignment */ - q->llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT, VCMDQ_LOG2SIZE_MAX); + /* Cap queue size to SMMU's IDR1.CMDQS and ensure natural alignment */ + regval = readl_relaxed(smmu->base + ARM_SMMU_IDR1); + q->llq.max_n_shift = + min_t(u32, CMDQ_MAX_SZ_SHIFT, FIELD_GET(IDR1_CMDQS, regval)); /* Use the common helper to init the VCMDQ, and then... */ ret = arm_smmu_init_one_queue(smmu, q, vcmdq->page0, From fee12cf9e301863a1a4421e8f81204f4082d25cc Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Mon, 7 Apr 2025 13:19:08 -0700 Subject: [PATCH 20/28] iommu/tegra241-cmdqv: Fix warnings due to dmam_free_coherent() Two WARNINGs are observed when SMMU driver rolls back upon failure: arm-smmu-v3.9.auto: Failed to register iommu arm-smmu-v3.9.auto: probe with driver arm-smmu-v3 failed with error -22 ------------[ cut here ]------------ WARNING: CPU: 5 PID: 1 at kernel/dma/mapping.c:74 dmam_free_coherent+0xc0/0xd8 Call trace: dmam_free_coherent+0xc0/0xd8 (P) tegra241_vintf_free_lvcmdq+0x74/0x188 tegra241_cmdqv_remove_vintf+0x60/0x148 tegra241_cmdqv_remove+0x48/0xc8 arm_smmu_impl_remove+0x28/0x60 devm_action_release+0x1c/0x40 ------------[ cut here ]------------ 128 pages are still in use! WARNING: CPU: 16 PID: 1 at mm/page_alloc.c:6902 free_contig_range+0x18c/0x1c8 Call trace: free_contig_range+0x18c/0x1c8 (P) cma_release+0x154/0x2f0 dma_free_contiguous+0x38/0xa0 dma_direct_free+0x10c/0x248 dma_free_attrs+0x100/0x290 dmam_free_coherent+0x78/0xd8 tegra241_vintf_free_lvcmdq+0x74/0x160 tegra241_cmdqv_remove+0x98/0x198 arm_smmu_impl_remove+0x28/0x60 devm_action_release+0x1c/0x40 This is because the LVCMDQ queue memory are managed by devres, while that dmam_free_coherent() is called in the context of devm_action_release(). Jason pointed out that "arm_smmu_impl_probe() has mis-ordered the devres callbacks if ops->device_remove() is going to be manually freeing things that probe allocated": https://lore.kernel.org/linux-iommu/20250407174408.GB1722458@nvidia.com/ In fact, tegra241_cmdqv_init_structures() only allocates memory resources which means any failure that it generates would be similar to -ENOMEM, so there is no point in having that "falling back to standard SMMU" routine, as the standard SMMU would likely fail to allocate memory too. Remove the unwind part in tegra241_cmdqv_init_structures(), and return a proper error code to ask SMMU driver to call tegra241_cmdqv_remove() via impl_ops->device_remove(). Then, drop tegra241_vintf_free_lvcmdq() since devres will take care of that. Fixes: 483e0bd8883a ("iommu/tegra241-cmdqv: Do not allocate vcmdq until dma_set_mask_and_coherent") Cc: stable@vger.kernel.org Suggested-by: Jason Gunthorpe Change-Id: I4458d5b155a1a3844c0004e6bfe2863b7f967220 Signed-off-by: Nicolin Chen Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20250407201908.172225-1-nicolinc@nvidia.com Signed-off-by: Joerg Roedel Signed-off-by: Ashish Mhetre Reviewed-on: https://git-master.nvidia.com/r/c/3rdparty/canonical/linux-noble/+/3435447 GVS: buildbot_gerritrpt Reviewed-by: Pritesh Raithatha --- .../iommu/arm/arm-smmu-v3/tegra241-cmdqv.c | 32 +++---------------- 1 file changed, 5 insertions(+), 27 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c index d525ab43a4ae..dd7d030d2e89 100644 --- a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c +++ b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c @@ -487,17 +487,6 @@ static int tegra241_cmdqv_hw_reset(struct arm_smmu_device *smmu) /* VCMDQ Resource Helpers */ -static void tegra241_vcmdq_free_smmu_cmdq(struct tegra241_vcmdq *vcmdq) -{ - struct arm_smmu_queue *q = &vcmdq->cmdq.q; - size_t nents = 1 << q->llq.max_n_shift; - size_t qsz = nents << CMDQ_ENT_SZ_SHIFT; - - if (!q->base) - return; - dmam_free_coherent(vcmdq->cmdqv->smmu.dev, qsz, q->base, q->base_dma); -} - static int tegra241_vcmdq_alloc_smmu_cmdq(struct tegra241_vcmdq *vcmdq) { struct arm_smmu_device *smmu = &vcmdq->cmdqv->smmu; @@ -560,7 +549,8 @@ static void tegra241_vintf_free_lvcmdq(struct tegra241_vintf *vintf, u16 lidx) struct tegra241_vcmdq *vcmdq = vintf->lvcmdqs[lidx]; char header[64]; - tegra241_vcmdq_free_smmu_cmdq(vcmdq); + /* Note that the lvcmdq queue memory space is managed by devres */ + tegra241_vintf_deinit_lvcmdq(vintf, lidx); dev_dbg(vintf->cmdqv->dev, @@ -768,13 +758,13 @@ static int tegra241_cmdqv_init_structures(struct arm_smmu_device *smmu) vintf = kzalloc(sizeof(*vintf), GFP_KERNEL); if (!vintf) - goto out_fallback; + return -ENOMEM; /* Init VINTF0 for in-kernel use */ ret = tegra241_cmdqv_init_vintf(cmdqv, 0, vintf); if (ret) { dev_err(cmdqv->dev, "failed to init vintf0: %d\n", ret); - goto free_vintf; + return ret; } /* Preallocate logical VCMDQs to VINTF0 */ @@ -783,24 +773,12 @@ static int tegra241_cmdqv_init_structures(struct arm_smmu_device *smmu) vcmdq = tegra241_vintf_alloc_lvcmdq(vintf, lidx); if (IS_ERR(vcmdq)) - goto free_lvcmdq; + return PTR_ERR(vcmdq); } /* Now, we are ready to run all the impl ops */ smmu->impl_ops = &tegra241_cmdqv_impl_ops; return 0; - -free_lvcmdq: - for (lidx--; lidx >= 0; lidx--) - tegra241_vintf_free_lvcmdq(vintf, lidx); - tegra241_cmdqv_deinit_vintf(cmdqv, vintf->idx); -free_vintf: - kfree(vintf); -out_fallback: - dev_info(smmu->impl_dev, "Falling back to standard SMMU CMDQ\n"); - smmu->options &= ~ARM_SMMU_OPT_TEGRA241_CMDQV; - tegra241_cmdqv_remove(smmu); - return 0; } #ifdef CONFIG_IOMMU_DEBUGFS From 56a7b340cd016c03daa212b5f343a76b3d6734b9 Mon Sep 17 00:00:00 2001 From: Pritesh Raithatha Date: Thu, 17 Apr 2025 03:04:36 -0700 Subject: [PATCH 21/28] [DOWNSTREAM]iommu/arm-smmu-v3: use reserved memory for allocations CMA allocations are reusable. When not allocated, it can be used for temporary allocations. When there is allocation request, temporary allocations will be reclaimed and that takes time. SMMU uses CMA allocations and causing boot time increase. To avoid this, add reserved memory pool and use for SMMU allocations instead of CMA. With reserved memory 1MB allocation time is reduced from 17999343ns to 166037ns. Bug 5115195 Change-Id: I34febac4235da68027908969b9348cbfd2feffc4 Signed-off-by: Pritesh Raithatha Reviewed-on: https://git-master.nvidia.com/r/c/3rdparty/canonical/linux-noble/+/3446227 Reviewed-by: svcacv GVS: buildbot_gerritrpt Reviewed-by: Bharat Nihalani Tested-by: Bharat Nihalani --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 77c0cc385295..d33280db0c3a 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2,7 +2,8 @@ /* * IOMMU API for ARM architected SMMUv3 implementations. * - * Copyright (C) 2015 ARM Limited + * SPDX-FileCopyrightText: Copyright (C) 2015 ARM Limited + * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. All rights reserved. * * Author: Will Deacon * @@ -23,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -3814,6 +3816,9 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev, u32 cells; int ret = -EINVAL; + if (!of_reserved_mem_device_init(dev)) + dev_info(dev, "using device-specific reserved memory\n"); + if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells)) dev_err(dev, "missing #iommu-cells property\n"); else if (cells != 1) From a0eab0226d01593e0f4d6bbe1a94abeebb0d0373 Mon Sep 17 00:00:00 2001 From: Ashish Mhetre Date: Fri, 5 Sep 2025 13:06:54 +0000 Subject: [PATCH 22/28] Revert "NVIDIA: SAUCE: iommu/arm-smmu-v3: add suspend/resume support" There is other similar commit added for suspend/resume on K6.1. Using that commit e6edc95c25dc52fcebf985206ce61fbf817abc98 This reverts commit be979fd7a1886517c0f4d1374fdb02130d637e09. Bug 5506739 Change-Id: I32d88bc63d9f94d4eb6efdac298e7c2932b7b6e3 Signed-off-by: Ashish Mhetre Reviewed-on: https://git-master.nvidia.com/r/c/3rdparty/canonical/linux-noble/+/3449096 Reviewed-by: Pritesh Raithatha GVS: buildbot_gerritrpt --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 86 +++++++-------------- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 2 - 2 files changed, 29 insertions(+), 59 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index d33280db0c3a..3655f4114654 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -3196,6 +3196,15 @@ static void arm_smmu_setup_msis(struct arm_smmu_device *smmu) int ret, nvec = ARM_SMMU_MAX_MSIS; struct device *dev = smmu->dev; + /* Clear the MSI address regs */ + writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0); + writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0); + + if (smmu->features & ARM_SMMU_FEAT_PRI) + writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0); + else + nvec--; + if (!(smmu->features & ARM_SMMU_FEAT_MSI)) return; @@ -3204,9 +3213,6 @@ static void arm_smmu_setup_msis(struct arm_smmu_device *smmu) return; } - if (!(smmu->features & ARM_SMMU_FEAT_PRI)) - nvec--; - /* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */ ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg); if (ret) { @@ -3268,9 +3274,9 @@ static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu) } } -static int arm_smmu_reset_irqs(struct arm_smmu_device *smmu) +static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu) { - int ret; + int ret, irq; u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN; /* Disable IRQs first */ @@ -3281,35 +3287,7 @@ static int arm_smmu_reset_irqs(struct arm_smmu_device *smmu) return ret; } - if (!smmu->combined_irq) { - /* - * Clear the MSI address regs. These registers will be reset - * in arm_smmu_write_msi_msg callback function by irq_domain - * upon a new MSI message. - */ - writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0); - writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0); - - if (smmu->features & ARM_SMMU_FEAT_PRI) - writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0); - } - - if (smmu->features & ARM_SMMU_FEAT_PRI) - irqen_flags |= IRQ_CTRL_PRIQ_IRQEN; - - /* Enable interrupt generation on the SMMU */ - ret = arm_smmu_write_reg_sync(smmu, irqen_flags, - ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK); - if (ret) - dev_warn(smmu->dev, "failed to enable irqs\n"); - - return ret; -} - -static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu) -{ - int ret = 0, irq = smmu->combined_irq; - + irq = smmu->combined_irq; if (irq) { /* * Cavium ThunderX2 implementation doesn't support unique irq @@ -3325,7 +3303,16 @@ static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu) } else arm_smmu_setup_unique_irqs(smmu); - return ret; + if (smmu->features & ARM_SMMU_FEAT_PRI) + irqen_flags |= IRQ_CTRL_PRIQ_IRQEN; + + /* Enable interrupt generation on the SMMU */ + ret = arm_smmu_write_reg_sync(smmu, irqen_flags, + ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK); + if (ret) + dev_warn(smmu->dev, "failed to enable irqs\n"); + + return 0; } static int arm_smmu_device_disable(struct arm_smmu_device *smmu) @@ -3339,7 +3326,7 @@ static int arm_smmu_device_disable(struct arm_smmu_device *smmu) return ret; } -static int arm_smmu_device_reset(struct arm_smmu_device *smmu) +static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass) { int ret; u32 reg, enables; @@ -3447,9 +3434,9 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu) } } - ret = arm_smmu_reset_irqs(smmu); + ret = arm_smmu_setup_irqs(smmu); if (ret) { - dev_err(smmu->dev, "failed to reset irqs\n"); + dev_err(smmu->dev, "failed to setup irqs\n"); return ret; } @@ -3457,7 +3444,7 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu) enables &= ~(CR0_EVTQEN | CR0_PRIQEN); /* Enable the SMMU interface, or ensure bypass */ - if (!smmu->bypass || disable_bypass) { + if (!bypass || disable_bypass) { enables |= CR0_SMMUEN; } else { ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT); @@ -3920,6 +3907,7 @@ static int arm_smmu_device_probe(struct platform_device *pdev) resource_size_t ioaddr; struct arm_smmu_device *smmu; struct device *dev = &pdev->dev; + bool bypass; smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL); if (!smmu) @@ -3934,7 +3922,7 @@ static int arm_smmu_device_probe(struct platform_device *pdev) return ret; } /* Set bypass mode according to firmware probing result */ - smmu->bypass = !!ret; + bypass = !!ret; smmu = arm_smmu_impl_probe(smmu); if (IS_ERR(smmu)) @@ -4001,12 +3989,8 @@ static int arm_smmu_device_probe(struct platform_device *pdev) /* Check for RMRs and install bypass STEs if any */ arm_smmu_rmr_install_bypass_ste(smmu); - ret = arm_smmu_setup_irqs(smmu); - if (ret) - return ret; - /* Reset the device */ - ret = arm_smmu_device_reset(smmu); + ret = arm_smmu_device_reset(smmu, bypass); if (ret) goto err_disable; @@ -4071,22 +4055,10 @@ static void arm_smmu_driver_unregister(struct platform_driver *drv) platform_driver_unregister(drv); } -static int __maybe_unused arm_smmu_runtime_resume(struct device *dev) -{ - struct arm_smmu_device *smmu = dev_get_drvdata(dev); - - return arm_smmu_device_reset(smmu); -} - -static const struct dev_pm_ops arm_smmu_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(NULL, arm_smmu_runtime_resume) -}; - static struct platform_driver arm_smmu_driver = { .driver = { .name = "arm-smmu-v3", .of_match_table = arm_smmu_of_match, - .pm = &arm_smmu_pm_ops, .suppress_bind_attrs = true, }, .probe = arm_smmu_device_probe, diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 1bd34ee6ef7e..d047d0527c5d 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -707,8 +707,6 @@ struct arm_smmu_device { struct rb_root streams; struct mutex streams_mutex; - - bool bypass; }; struct arm_smmu_stream { From 09c630f285c112b1564c71b4ad67ed6f2e491ee6 Mon Sep 17 00:00:00 2001 From: Pritesh Raithatha Date: Mon, 1 Apr 2024 13:54:08 +0000 Subject: [PATCH 23/28] [UPSTREAM PENDING] iommu/arm-smmu-v3: add suspend/resume support Add suspend/resume support for arm-smmu-v3. Move irq initialization to probe and re-use the reset function for restoring registers in resume. Bug 4267541 Change-Id: I7bf410f0b69b56f1e1c138e9802449bcd3634a1e Signed-off-by: Pritesh Raithatha Reviewed-on: https://git-master.nvidia.com/r/c/linux-stable/+/3112738 Reviewed-on: https://git-master.nvidia.com/r/c/3rdparty/canonical/linux-noble/+/3447965 Tested-by: Ashish Mhetre GVS: buildbot_gerritrpt --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 31 +++++++++++++++------ drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 2 ++ 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 3655f4114654..1471dbfbcd7b 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -3434,12 +3434,6 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass) } } - ret = arm_smmu_setup_irqs(smmu); - if (ret) { - dev_err(smmu->dev, "failed to setup irqs\n"); - return ret; - } - if (is_kdump_kernel()) enables &= ~(CR0_EVTQEN | CR0_PRIQEN); @@ -3907,7 +3901,6 @@ static int arm_smmu_device_probe(struct platform_device *pdev) resource_size_t ioaddr; struct arm_smmu_device *smmu; struct device *dev = &pdev->dev; - bool bypass; smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL); if (!smmu) @@ -3922,7 +3915,7 @@ static int arm_smmu_device_probe(struct platform_device *pdev) return ret; } /* Set bypass mode according to firmware probing result */ - bypass = !!ret; + smmu->bypass = !!ret; smmu = arm_smmu_impl_probe(smmu); if (IS_ERR(smmu)) @@ -3990,10 +3983,16 @@ static int arm_smmu_device_probe(struct platform_device *pdev) arm_smmu_rmr_install_bypass_ste(smmu); /* Reset the device */ - ret = arm_smmu_device_reset(smmu, bypass); + ret = arm_smmu_device_reset(smmu, smmu->bypass); if (ret) goto err_disable; + ret = arm_smmu_setup_irqs(smmu); + if (ret) { + dev_err(smmu->dev, "failed to setup irqs\n"); + return ret; + } + /* And we're up. Go go go! */ ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL, "smmu3.%pa", &ioaddr); @@ -4055,10 +4054,24 @@ static void arm_smmu_driver_unregister(struct platform_driver *drv) platform_driver_unregister(drv); } +static int __maybe_unused arm_smmu_runtime_resume(struct device *dev) +{ + struct arm_smmu_device *smmu = dev_get_drvdata(dev); + + arm_smmu_device_reset(smmu, smmu->bypass); + + return 0; +} + +static const struct dev_pm_ops arm_smmu_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(NULL, arm_smmu_runtime_resume) +}; + static struct platform_driver arm_smmu_driver = { .driver = { .name = "arm-smmu-v3", .of_match_table = arm_smmu_of_match, + .pm = &arm_smmu_pm_ops, .suppress_bind_attrs = true, }, .probe = arm_smmu_device_probe, diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index d047d0527c5d..b01adaba07e5 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -707,6 +707,8 @@ struct arm_smmu_device { struct rb_root streams; struct mutex streams_mutex; + + bool bypass; }; struct arm_smmu_stream { From a7ae04815bc24cfc5569fd93a56ac58995d96a4f Mon Sep 17 00:00:00 2001 From: Ashish Mhetre Date: Tue, 8 Apr 2025 14:57:44 +0000 Subject: [PATCH 24/28] DOWNSTREAM: iommu/arm-smmu-v3: Add pm suspend op - Issue CMD_OP_CFGI and CMD_OP_TBI_ALL to ensure all pending transactions are complete before going into suspend and then disable SMMU device so that there won't be any new map/unmap requests. - Change to sleep ops to late sleep ops so that SMMU will suspend late after clients and resume early before clients. - Add few debug prints Bug 5117507 Bug 5165373 Change-Id: I108861e6288fd63cebd6d2da2aa93ece071d419f Signed-off-by: Ashish Mhetre Reviewed-on: https://git-master.nvidia.com/r/c/3rdparty/canonical/linux-noble/+/3447041 GVS: buildbot_gerritrpt Reviewed-by: svcacv Reviewed-by: Pritesh Raithatha --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 1471dbfbcd7b..185f4ef2deaf 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -4058,13 +4058,32 @@ static int __maybe_unused arm_smmu_runtime_resume(struct device *dev) { struct arm_smmu_device *smmu = dev_get_drvdata(dev); + dev_dbg(dev, "Resuming\n"); arm_smmu_device_reset(smmu, smmu->bypass); return 0; } +static int __maybe_unused arm_smmu_runtime_suspend(struct device *dev) +{ + struct arm_smmu_device *smmu = dev_get_drvdata(dev); + struct arm_smmu_cmdq_ent cmd; + + cmd.opcode = CMDQ_OP_CFGI_ALL; + arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd); + + cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL; + arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd); + + dev_dbg(dev, "Disabling\n"); + arm_smmu_device_disable(smmu); + + dev_dbg(dev, "Suspending\n"); + return 0; +} + static const struct dev_pm_ops arm_smmu_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(NULL, arm_smmu_runtime_resume) + SET_LATE_SYSTEM_SLEEP_PM_OPS(arm_smmu_runtime_suspend, arm_smmu_runtime_resume) }; static struct platform_driver arm_smmu_driver = { From 9cbafdf4c31160740e45a2d8f149c3910a7e85d4 Mon Sep 17 00:00:00 2001 From: Ashish Mhetre Date: Fri, 22 Nov 2024 11:14:14 +0000 Subject: [PATCH 25/28] [UPSTREAM PENDING] iommu/arm-smmu-v3: Add device-tree support in tegra241-cmdqv driver Add support for initialization from device-tree in CMDQV driver required for T264 which mimics the current ACPI probe. Drop ACPI dependency in Kconfig since the inline ifdef would be enough to depend on ACPI Bug 4900238 Change-Id: I4ff0996c9ee0688a0ea795892e2fe59133303658 Signed-off-by: Ashish Mhetre Reviewed-on: https://git-master.nvidia.com/r/c/3rdparty/canonical/linux-noble/+/3439897 Reviewed-by: Pritesh Raithatha GVS: buildbot_gerritrpt Reviewed-by: mobile promotions Tested-by: mobile promotions --- drivers/iommu/Kconfig | 2 +- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 30 +++++++++++++ .../iommu/arm/arm-smmu-v3/tegra241-cmdqv.c | 44 ++++++++++++++++++- 3 files changed, 74 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index e0d5960e7753..f58d6fc37d30 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -409,7 +409,7 @@ config ARM_SMMU_V3_SVA config TEGRA241_CMDQV bool "NVIDIA Tegra241 CMDQ-V extension support for ARM SMMUv3" - depends on ACPI + depends on ARM_SMMU_V3 help Support for NVIDIA CMDQ-Virtualization extension for ARM SMMUv3. The CMDQ-V extension is similar to v3.3 ECMDQ for multi command queues diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 185f4ef2deaf..dbf12404145e 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -3713,6 +3713,34 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) return 0; } +#ifdef CONFIG_TEGRA241_CMDQV +static void tegra_cmdqv_dt_probe(struct device_node *smmu_node, + struct arm_smmu_device *smmu) +{ + struct platform_device *pdev; + struct device_node *np; + + np = of_parse_phandle(smmu_node, "nvidia,cmdqv", 0); + if (!np) + return; + + pdev = of_find_device_by_node(np); + of_node_put(np); + if (!pdev) + return; + + smmu->impl_dev = &pdev->dev; + smmu->options |= ARM_SMMU_OPT_TEGRA241_CMDQV; + dev_info(smmu->dev, "found companion CMDQV device: %s\n", + dev_name(smmu->impl_dev)); +} +#else +static void tegra_cmdqv_dt_probe(struct device_node *smmu_node, + struct arm_smmu_device *smmu) +{ +} +#endif + #ifdef CONFIG_ACPI #ifdef CONFIG_TEGRA241_CMDQV static void acpi_smmu_dsdt_probe_tegra241_cmdqv(struct acpi_iort_node *node, @@ -3812,6 +3840,8 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev, if (of_dma_is_coherent(dev->of_node)) smmu->features |= ARM_SMMU_FEAT_COHERENCY; + tegra_cmdqv_dt_probe(dev->of_node, smmu); + return ret; } diff --git a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c index dd7d030d2e89..48618089d1f2 100644 --- a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c +++ b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c @@ -9,6 +9,8 @@ #include #include #include +#include +#include #include @@ -748,6 +750,26 @@ free_list: return res; } +static struct resource * +tegra241_cmdqv_find_dt_resource(struct device *dev, int *irq) +{ + struct platform_device *pdev = to_platform_device(dev); + struct resource *res; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + dev_err(dev, "no memory resource found for CMDQV\n"); + return NULL; + } + + if (irq) + *irq = platform_get_irq_byname_optional(pdev, "cmdqv"); + if (!irq || *irq <= 0) + dev_warn(dev, "no interrupt. errors will not be reported\n"); + + return res; +} + static int tegra241_cmdqv_init_structures(struct arm_smmu_device *smmu) { struct tegra241_cmdqv *cmdqv = @@ -875,11 +897,14 @@ struct arm_smmu_device *tegra241_cmdqv_probe(struct arm_smmu_device *smmu) if (!smmu->dev->of_node) res = tegra241_cmdqv_find_acpi_resource(smmu->impl_dev, &irq); + else + res = tegra241_cmdqv_find_dt_resource(smmu->impl_dev, &irq); if (!res) goto out_fallback; new_smmu = __tegra241_cmdqv_probe(smmu, res, irq); - kfree(res); + if (!smmu->dev->of_node) + kfree(res); if (new_smmu) return new_smmu; @@ -890,3 +915,20 @@ out_fallback: put_device(smmu->impl_dev); return ERR_PTR(-ENODEV); } + +static const struct of_device_id tegra241_cmdqv_of_match[] = { + { .compatible = "nvidia,tegra264-cmdqv" }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, tegra241_cmdqv_of_match); + +static struct platform_driver tegra241_cmdqv_driver = { + .driver = { + .name = "tegra241-cmdqv", + .of_match_table = tegra241_cmdqv_of_match, + }, +}; +module_platform_driver(tegra241_cmdqv_driver); + +MODULE_DESCRIPTION("NVIDIA Tegra241 Command Queue Virtualization Driver"); +MODULE_LICENSE("GPL v2"); From c16f161cd7fe37428b4f7007b590b8c5670c8d36 Mon Sep 17 00:00:00 2001 From: Ashish Mhetre Date: Tue, 26 Aug 2025 05:24:07 +0000 Subject: [PATCH 26/28] [DOWNSTREAM]: iommu/tegra241-cmdqv: WAR for 64-bit writes on NV HV MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit NVIDIA’s hypervisor does not support 64-bit writes to consecutive two 32-bit registers e.g., VCMDQ_BASE_LO and VCMDQ_BASE_HI. The driver currently issues a 64-bit write to such registers, which works fine on real hardware but fails under NV HV. This is not a functional bug in the driver, but rather a quirk of the hypervisor which does not fully emulate the HW behavior. Add a workaround to split the write into two 32-bit accesses. Bug 5111712 Change-Id: I6fb6a926a80326e2d7a8a2ec9e475106af843f7c Signed-off-by: Ashish Mhetre Reviewed-on: https://git-master.nvidia.com/r/c/3rdparty/canonical/linux-noble/+/3439898 Reviewed-by: Pritesh Raithatha Reviewed-by: Nicolin Chen GVS: buildbot_gerritrpt --- drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c index 48618089d1f2..f97dc8a8fae4 100644 --- a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c +++ b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c @@ -82,8 +82,10 @@ #define VCMDQ_ADDR GENMASK(47, 5) #define VCMDQ_LOG2SIZE GENMASK(4, 0) -#define TEGRA241_VCMDQ_BASE 0x00000 -#define TEGRA241_VCMDQ_CONS_INDX_BASE 0x00008 +#define TEGRA241_VCMDQ_BASE 0x00000 +#define TEGRA241_VCMDQ_BASE_H 0x00004 +#define TEGRA241_VCMDQ_CONS_INDX_BASE 0x00008 +#define TEGRA241_VCMDQ_CONS_INDX_BASE_H 0x0000C /* VINTF logical-VCMDQ pages */ #define TEGRA241_VINTFi_PAGE0(i) (TEGRA241_VINTF_PAGE_BASE + SZ_128K*(i)) @@ -367,8 +369,10 @@ static void tegra241_vcmdq_hw_deinit(struct tegra241_vcmdq *vcmdq) } writel_relaxed(0, REG_VCMDQ_PAGE0(vcmdq, PROD)); writel_relaxed(0, REG_VCMDQ_PAGE0(vcmdq, CONS)); - writeq_relaxed(0, REG_VCMDQ_PAGE1(vcmdq, BASE)); - writeq_relaxed(0, REG_VCMDQ_PAGE1(vcmdq, CONS_INDX_BASE)); + writel_relaxed(0, REG_VCMDQ_PAGE1(vcmdq, BASE_H)); + writel_relaxed(0, REG_VCMDQ_PAGE1(vcmdq, BASE)); + writel_relaxed(0, REG_VCMDQ_PAGE1(vcmdq, CONS_INDX_BASE_H)); + writel_relaxed(0, REG_VCMDQ_PAGE1(vcmdq, CONS_INDX_BASE)); gerrorn = readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERRORN)); gerror = readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERROR)); @@ -390,7 +394,8 @@ static int tegra241_vcmdq_hw_init(struct tegra241_vcmdq *vcmdq) tegra241_vcmdq_hw_deinit(vcmdq); /* Configure and enable VCMDQ */ - writeq_relaxed(vcmdq->cmdq.q.q_base, REG_VCMDQ_PAGE1(vcmdq, BASE)); + writel_relaxed(upper_32_bits(vcmdq->cmdq.q.q_base), REG_VCMDQ_PAGE1(vcmdq, BASE_H)); + writel_relaxed(lower_32_bits(vcmdq->cmdq.q.q_base), REG_VCMDQ_PAGE1(vcmdq, BASE)); ret = vcmdq_write_config(vcmdq, VCMDQ_EN); if (ret) { From c79ae7acb377c3d0be9c539a799a485d64a24918 Mon Sep 17 00:00:00 2001 From: Ashish Mhetre Date: Wed, 19 Feb 2025 17:24:37 +0000 Subject: [PATCH 27/28] [DOWNSTREAM] iommu/arm-smmu-v3: Retain prod and cons after resume Set PROD and CONS registers of VCMDQs with retained values from prod and cons variables after SC7 resume. Bug 5117507 Change-Id: Ida34bb04ce669fdc7901fa935e2a2eff806e3d1e Signed-off-by: Ashish Mhetre Reviewed-on: https://git-master.nvidia.com/r/c/3rdparty/canonical/linux-noble/+/3447044 Reviewed-by: Pritesh Raithatha GVS: buildbot_gerritrpt --- drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c index f97dc8a8fae4..388a9b891fd9 100644 --- a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c +++ b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c @@ -396,6 +396,8 @@ static int tegra241_vcmdq_hw_init(struct tegra241_vcmdq *vcmdq) /* Configure and enable VCMDQ */ writel_relaxed(upper_32_bits(vcmdq->cmdq.q.q_base), REG_VCMDQ_PAGE1(vcmdq, BASE_H)); writel_relaxed(lower_32_bits(vcmdq->cmdq.q.q_base), REG_VCMDQ_PAGE1(vcmdq, BASE)); + writel_relaxed(vcmdq->cmdq.q.llq.prod, REG_VCMDQ_PAGE0(vcmdq, PROD)); + writel_relaxed(vcmdq->cmdq.q.llq.cons, REG_VCMDQ_PAGE0(vcmdq, CONS)); ret = vcmdq_write_config(vcmdq, VCMDQ_EN); if (ret) { From 5865348c39a2c734e1ab53ee720b7cda27d552aa Mon Sep 17 00:00:00 2001 From: Ashish Mhetre Date: Tue, 16 Sep 2025 14:42:33 +0000 Subject: [PATCH 28/28] DOWNSTREAM: iommu/arm-smmu-v3: Fix arm_smmu_impl_ops While backporting the upstream commit: 44a9231b800b454befac069d468e2f2b71827474 (iommu/arm-smmu-v3: Add struct arm_smmu_impl_ops) it didn't apply correctly causing cmdqv reset not getting invoked during resume. Fix it by moving the device reset in arm_smmu_device_reset as in upstream commit. Bug 5419379 Bug 5111712 Change-Id: Ifc873bccd0b43b4e6890ac5d6672b4ea3e71cc88 Signed-off-by: Ashish Mhetre Reviewed-on: https://git-master.nvidia.com/r/c/3rdparty/canonical/linux-noble/+/3452819 GVS: buildbot_gerritrpt Reviewed-by: svcacv Reviewed-by: Pritesh Raithatha --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index dbf12404145e..9b8d4a68daee 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -3452,6 +3452,14 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass) return ret; } + if (smmu->impl_ops && smmu->impl_ops->device_reset) { + ret = smmu->impl_ops->device_reset(smmu); + if (ret) { + dev_err(smmu->dev, "failed to reset impl\n"); + return ret; + } + } + return 0; } @@ -4035,14 +4043,6 @@ static int arm_smmu_device_probe(struct platform_device *pdev) goto err_free_sysfs; } - if (smmu->impl_ops && smmu->impl_ops->device_reset) { - ret = smmu->impl_ops->device_reset(smmu); - if (ret) { - dev_err(smmu->dev, "failed to reset impl\n"); - return ret; - } - } - return 0; err_free_sysfs: