arm64: configs: Enable CMDQV

Enable CMDQV in defconfigs Bug 5158829 Bug 5111712 Bug 5419379 Change-Id: I93fdc23c3590091016f1a6f99157a4d1cb2fb638 Reviewed-on: https://git-master.nvidia.com/r/c/3rdparty/canonical/linux-noble/+/3439904 (cherry-picked from commit a9e268cc859f893579b1071d31451f1511cc1e65) Signed-off-by: Ashish Mhetre <amhetre@nvidia.com> Reviewed-on: https://git-master.nvidia.com/r/c/3rdparty/canonical/linux-noble/+/3495589 Reviewed-by: Pritesh Raithatha <praithatha@nvidia.com> GVS: buildbot_gerritrpt <buildbot_gerritrpt@nvidia.com>
Merge 'drivers/iommu' from dev-main into rel-38
2025-11-20 08:13:30 -08:00 · 2025-11-20 10:25:56 +00:00 · 2025-11-20 10:22:55 +00:00 · 2025-11-20 10:22:55 +00:00 · 2025-11-20 10:22:55 +00:00 · 2025-11-20 10:22:54 +00:00
26 changed files with 2118 additions and 306 deletions
--- a/Documentation/admin-guide/perf/nvidia-pmu.rst
+++ b/Documentation/admin-guide/perf/nvidia-pmu.rst
@@ -10,6 +10,11 @@ metrics like memory bandwidth, latency, and utilization:
 * NVLink-C2C1
 * CNVLink
 * PCIE
+* Unified Coherency Fabric (UCF)
+* Vision
+* Display
+* High-speed IO
+* UCF-GPU

 PMU Driver
 ----------
@@ -183,6 +188,159 @@ Example usage:

   perf stat -a -e nvidia_pcie_pmu_1/event=0x0,root_port=0x3/

+UCF PMU
+-------
+
+The UCF PMU monitors system level cache events and DRAM traffic that flows
+through UCF.
+
+The events and configuration options of this PMU device are described in sysfs,
+see /sys/bus/event_sources/devices/nvidia_ucf_pmu_<socket-id>.
+
+User can configure the PMU to capture events from specific source and destination.
+The source/destination filter is described in
+/sys/bus/event_sources/devices/nvidia_ucf_pmu_<socket-id>/format/. By default
+traffic from all sources and destinations will be captured if no source/destination
+is specified.
+
+Example usage:
+
+* Count event id 0x0 from any source/destination of socket 0::
+
+   perf stat -a -e nvidia_ucf_pmu_0/event=0x0/
+
+* Count event id 0x1 from socket 0's CPUs to socket 0's DRAM::
+
+   perf stat -a -e nvidia_ucf_pmu_0/event=0x1,src_loc_cpu=0x1,dst_loc=0x1/
+
+* Count event id 0x1 from remote source of socket 0 to local and remote DRAM::
+
+   perf stat -a -e nvidia_ucf_pmu_0/event=0x1,src_rem=0x1,dst_loc=0x1,dst_rem=0x1/
+
+* Count event id 0x2 from any source/destination of socket 1::
+
+   perf stat -a -e nvidia_ucf_pmu_1/event=0x2/
+
+* Count event id 0x3 from socket 1's CPUs to socket 1's DRAM::
+
+   perf stat -a -e nvidia_ucf_pmu_1/event=0x3,src_loc_cpu=0x1,dst_loc=0x1/
+
+
+Vision PMU
+------------
+
+The vision PMU monitors memory traffic from the multimedia IPs in the SOC.
+
+The events and configuration options of this PMU device are described in sysfs,
+see /sys/bus/event_sources/devices/nvidia_vision_pmu_<socket-id>.
+
+User can configure the PMU to capture events from specific IPs.
+/sys/bus/event_sources/devices/nvidia_vision_pmu_<socket-id>/format/ contains
+the filter attribute name of each multimedia IP. This filter attribute is a
+bitmask to select the AXI/hub interface of the IP to monitor. By default traffic
+from all interfaces of all IPs will be captured if no IPs are specified.
+
+Example usage:
+
+* Count event id 0x0 from all multimedia IPs in socket 0::
+
+   perf stat -a -e nvidia_vision_pmu_0/event=0x0/
+
+* Count event id 0x1 from AXI/hub interface 0 in VI-0 of socket 0::
+
+   perf stat -a -e nvidia_vision_pmu_0/event=0x1,vi_0=0x1/
+
+* Count event id 0x1 from AXI/hub interface 0 and 1 in VI-0 of socket 0::
+
+   perf stat -a -e nvidia_vision_pmu_0/event=0x1,vi_0=0x3/
+
+* Count event id 0x2 from all multimedia IPs in socket 1::
+
+   perf stat -a -e nvidia_vision_pmu_1/event=0x2/
+
+* Count event id 0x3 from AXI/hub interface 0 in VI-0 and PVA of socket 1::
+
+   perf stat -a -e nvidia_vision_pmu_1/event=0x3,vi_0=0x1,pva=0x1/
+
+
+Display PMU
+------------
+
+The display PMU monitors memory traffic from the display IP in the SOC.
+
+The events and configuration options of this PMU device are described in sysfs,
+see /sys/bus/event_sources/devices/nvidia_display_pmu_<socket-id>.
+
+Example usage:
+
+* Count event id 0x0 in socket 0::
+
+   perf stat -a -e nvidia_display_pmu_0/event=0x0/
+
+* Count event id 0x0 in socket 1::
+
+   perf stat -a -e nvidia_display_pmu_1/event=0x0/
+
+
+High-speed I/O PMU
+-------------------
+
+The high-speed I/O PMU monitors memory traffic from the high speed I/O devices
+in the SOC.
+
+The events and configuration options of this PMU device are described in sysfs,
+see /sys/bus/event_sources/devices/nvidia_uphy_pmu_<socket-id>.
+
+User can configure the PMU to capture events from specific I/Os.
+/sys/bus/event_sources/devices/nvidia_uphy_pmu_<socket-id>/format/ contains
+the filter attribute name of each I/O. This filter attribute is a
+bitmask to select the AXI/hub interface of the I/O to monitor. By default
+traffic from all interfaces of all I/Os will be captured if no I/Os are
+specified.
+
+Example usage:
+
+* Count event id 0x0 from all I/Os in socket 0::
+
+   perf stat -a -e nvidia_uphy_pmu_0/event=0x0/
+
+* Count event id 0x1 from PCIE Root Port 1 of socket 0::
+
+   perf stat -a -e nvidia_uphy_pmu_0/event=0x1,pcie_rp_1=0x1/
+
+* Count event id 0x1 from PCIE Root Port 1 and Root Port 2 of socket 0::
+
+   perf stat -a -e nvidia_uphy_pmu_0/event=0x1,pcie_rp_1=0x1,pcie_rp_2=0x1/
+
+* Count event id 0x2 from all IPs in socket 1::
+
+   perf stat -a -e nvidia_uphy_pmu_1/event=0x2/
+
+* Count event id 0x3 from PCIE Root Port 3 and UFS of socket 1::
+
+   perf stat -a -e nvidia_uphy_pmu_1/event=0x1,pcie_rp_3=0x1,ufs=0x1/
+
+
+UCF-GPU PMU
+------------
+
+The UCF-GPU PMU monitors integrated GPU physical address traffic flowing through
+UCF.
+
+The events and configuration options of this PMU device are described in sysfs,
+see /sys/bus/event_sources/devices/nvidia_ucf_gpu_pmu_<socket-id>.
+
+Example usage:
+
+* Count event id 0x0 in socket 0::
+
+   perf stat -a -e nvidia_ucf_gpu_pmu_0/event=0x0/
+
+* Count event id 0x0 in socket 1::
+
+   perf stat -a -e nvidia_ucf_gpu_pmu_1/event=0x0/
+
+
 .. _NVIDIA_Uncore_PMU_Traffic_Coverage_Section:

 Traffic Coverage
--- a/1
+++ b/1
@@ -21625,6 +21625,7 @@ M:	Thierry Reding <thierry.reding@gmail.com>
 R:	Krishna Reddy <vdumpa@nvidia.com>
 L:	linux-tegra@vger.kernel.org
 S:	Supported
+F:	drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
 F:	drivers/iommu/arm/arm-smmu/arm-smmu-nvidia.c
 F:	drivers/iommu/tegra*

--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -151,6 +151,7 @@ CONFIG_IP_MULTIPLE_TABLES=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
+CONFIG_NET_IPIP=m
 CONFIG_SYN_COOKIES=y
 CONFIG_IPV6=m
 CONFIG_NETFILTER=y
@@ -181,6 +182,23 @@ CONFIG_NETFILTER_XT_MATCH_RECENT=m
 CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
 CONFIG_NETFILTER_XT_MATCH_U32=m
 CONFIG_IP_SET=m
+CONFIG_IP_SET_MAX=512
+CONFIG_IP_SET_BITMAP_IP=m
+CONFIG_IP_SET_BITMAP_IPMAC=m
+CONFIG_IP_SET_BITMAP_PORT=m
+CONFIG_IP_SET_HASH_IP=m
+CONFIG_IP_SET_HASH_IPMARK=m
+CONFIG_IP_SET_HASH_IPPORT=m
+CONFIG_IP_SET_HASH_IPPORTIP=m
+CONFIG_IP_SET_HASH_IPPORTNET=m
+CONFIG_IP_SET_HASH_IPMAC=m
+CONFIG_IP_SET_HASH_MAC=m
+CONFIG_IP_SET_HASH_NETPORTNET=m
+CONFIG_IP_SET_HASH_NET=m
+CONFIG_IP_SET_HASH_NETNET=m
+CONFIG_IP_SET_HASH_NETPORT=m
+CONFIG_IP_SET_HASH_NETIFACE=m
+CONFIG_IP_SET_LIST_SET=m
 CONFIG_IP_VS=m
 CONFIG_NF_TABLES_ARP=y
 CONFIG_IP_NF_IPTABLES=m
@@ -304,6 +322,7 @@ CONFIG_MTK_ADSP_IPC=m
 CONFIG_ARM_FFA_TRANSPORT=y
 # CONFIG_EFI_VARS_PSTORE is not set
 CONFIG_EFI_CAPSULE_LOADER=y
+CONFIG_EFI_TEST=m
 CONFIG_IMX_SCU=y
 CONFIG_QCOM_QSEECOM=y
 CONFIG_QCOM_QSEECOM_UEFISECAPP=y
@@ -915,7 +934,7 @@ CONFIG_VIDEO_HANTRO=m
 CONFIG_VIDEO_IMX412=m
 CONFIG_VIDEO_OV5640=m
 CONFIG_VIDEO_OV5645=m
-CONFIG_DRM=m
+CONFIG_DRM=y
 CONFIG_DRM_I2C_NXP_TDA998X=m
 CONFIG_DRM_HDLCD=m
 CONFIG_DRM_MALI_DISPLAY=m
@@ -980,6 +999,7 @@ CONFIG_DRM_MEDIATEK_HDMI=m
 CONFIG_DRM_MXSFB=m
 CONFIG_DRM_IMX_LCDIF=m
 CONFIG_DRM_MESON=m
+CONFIG_DRM_SIMPLEDRM=y
 CONFIG_DRM_PL111=m
 CONFIG_DRM_LIMA=m
 CONFIG_DRM_PANFROST=m
@@ -987,7 +1007,6 @@ CONFIG_DRM_TIDSS=m
 CONFIG_DRM_POWERVR=m
 CONFIG_FB=y
 CONFIG_FB_EFI=y
-CONFIG_FB_SIMPLE=y
 CONFIG_FB_MODE_HELPERS=y
 CONFIG_BACKLIGHT_CLASS_DEVICE=y
 CONFIG_BACKLIGHT_PWM=m
@@ -1406,6 +1425,7 @@ CONFIG_TEGRA_IOMMU_SMMU=y
 CONFIG_ARM_SMMU=y
 CONFIG_ARM_SMMU_V3=y
 CONFIG_ARM_SMMU_V3_SVA=y
+CONFIG_TEGRA241_CMDQV=y
 CONFIG_MTK_IOMMU=y
 CONFIG_QCOM_IOMMU=y
 CONFIG_REMOTEPROC=y
--- a/arch/arm64/configs/tegra_prod_defconfig
+++ b/arch/arm64/configs/tegra_prod_defconfig
@@ -483,6 +483,7 @@ CONFIG_PLATFORM_MHU=y
 CONFIG_TEGRA_IOMMU_SMMU=y
 CONFIG_ARM_SMMU=y
 CONFIG_ARM_SMMU_V3=y
+CONFIG_TEGRA241_CMDQV=y
 CONFIG_MTK_IOMMU=y
 CONFIG_REMOTEPROC=y
 CONFIG_MTK_PMIC_WRAP=y
--- a/drivers/clk/tegra/clk-bpmp.c
+++ b/drivers/clk/tegra/clk-bpmp.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * Copyright (C) 2016-2022 NVIDIA Corporation
+ * Copyright (C) 2016-2025 NVIDIA Corporation
 */

 #include <linux/clk-provider.h>
@@ -584,6 +584,7 @@ tegra_bpmp_clk_register(struct tegra_bpmp *bpmp,
 	}

 	init.parent_names = parents;
+	init.flags = CLK_GET_RATE_NOCACHE;

 	err = devm_clk_hw_register(bpmp->dev, &clk->hw);

--- a/drivers/cpufreq/tegra194-cpufreq.c
+++ b/drivers/cpufreq/tegra194-cpufreq.c
@@ -615,8 +615,6 @@ static int tegra_cpufreq_init_cpufreq_table(struct cpufreq_policy *policy,
 static int tegra194_cpufreq_init(struct cpufreq_policy *policy)
 {
 	struct tegra194_cpufreq_data *data = cpufreq_get_driver_data();
-	int maxcpus_per_clock = data->soc->maxcpus_per_cluster *
-		data->soc->clusters_per_clk;
 	u32 clusterid = data->cpu_data[policy->cpu].clusterid;
 	struct cpufreq_frequency_table *freq_table;
 	struct cpufreq_frequency_table *bpmp_lut;
--- a/drivers/dma/tegra210-adma.c
+++ b/drivers/dma/tegra210-adma.c
@@ -420,10 +420,17 @@ static void tegra_adma_stop(struct tegra_adma_chan *tdc)
 		return;
 	}

-	kfree(tdc->desc);
+	vchan_terminate_vdesc(&tdc->desc->vd);
 	tdc->desc = NULL;
 }

+static void tegra_adma_synchronize(struct dma_chan *dc)
+{
+	struct tegra_adma_chan *tdc = to_tegra_adma_chan(dc);
+
+	vchan_synchronize(&tdc->vc);
+}
+
 static void tegra_adma_start(struct tegra_adma_chan *tdc)
 {
 	struct virt_dma_desc *vd = vchan_next_desc(&tdc->vc);
@@ -1157,6 +1164,7 @@ static int tegra_adma_probe(struct platform_device *pdev)
 	tdma->dma_dev.device_config = tegra_adma_slave_config;
 	tdma->dma_dev.device_tx_status = tegra_adma_tx_status;
 	tdma->dma_dev.device_terminate_all = tegra_adma_terminate_all;
+	tdma->dma_dev.device_synchronize = tegra_adma_synchronize;
 	tdma->dma_dev.src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
 	tdma->dma_dev.dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
 	tdma->dma_dev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -406,6 +406,18 @@ config ARM_SMMU_V3_SVA
 	  Say Y here if your system supports SVA extensions such as PCIe PASID
 	  and PRI.

+
+config TEGRA241_CMDQV
+	bool "NVIDIA Tegra241 CMDQ-V extension support for ARM SMMUv3"
+	depends on ARM_SMMU_V3
+	help
+	  Support for NVIDIA CMDQ-Virtualization extension for ARM SMMUv3. The
+	  CMDQ-V extension is similar to v3.3 ECMDQ for multi command queues
+	  support, except with virtualization capabilities.
+
+	  Say Y here if your system is NVIDIA Tegra241 (Grace) or it has the same
+	  CMDQ-V extension.
+
 config S390_IOMMU
 	def_bool y if S390 && PCI
 	depends on S390 && PCI
--- a/drivers/iommu/arm/arm-smmu-v3/Makefile
+++ b/drivers/iommu/arm/arm-smmu-v3/Makefile
@@ -2,4 +2,5 @@
 obj-$(CONFIG_ARM_SMMU_V3) += arm_smmu_v3.o
 arm_smmu_v3-objs-y += arm-smmu-v3.o
 arm_smmu_v3-objs-$(CONFIG_ARM_SMMU_V3_SVA) += arm-smmu-v3-sva.o
+arm_smmu_v3-objs-$(CONFIG_TEGRA241_CMDQV) += tegra241-cmdqv.o
 arm_smmu_v3-objs := $(arm_smmu_v3-objs-y)
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -2,7 +2,8 @@
 /*
 * IOMMU API for ARM architected SMMUv3 implementations.
 *
- * Copyright (C) 2015 ARM Limited
+ * SPDX-FileCopyrightText: Copyright (C) 2015 ARM Limited
+ * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. All rights reserved.
 *
 * Author: Will Deacon <will.deacon@arm.com>
 *
@@ -23,6 +24,7 @@
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_platform.h>
+#include <linux/of_reserved_mem.h>
 #include <linux/pci.h>
 #include <linux/pci-ats.h>
 #include <linux/platform_device.h>
@@ -345,14 +347,30 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
 	return 0;
 }

-static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu)
+static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu,
+					       struct arm_smmu_cmdq_ent *ent)
 {
-	return &smmu->cmdq;
+	struct arm_smmu_cmdq *cmdq = NULL;
+
+	if (smmu->impl_ops && smmu->impl_ops->get_secondary_cmdq)
+		cmdq = smmu->impl_ops->get_secondary_cmdq(smmu, ent);
+
+	return cmdq ?: &smmu->cmdq;
+}
+
+static bool arm_smmu_cmdq_needs_busy_polling(struct arm_smmu_device *smmu,
+					     struct arm_smmu_cmdq *cmdq)
+{
+	if (cmdq == &smmu->cmdq)
+		return false;
+
+	return smmu->options & ARM_SMMU_OPT_TEGRA241_CMDQV;
 }

 static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
-					 struct arm_smmu_queue *q, u32 prod)
+					 struct arm_smmu_cmdq *cmdq, u32 prod)
 {
+	struct arm_smmu_queue *q = &cmdq->q;
 	struct arm_smmu_cmdq_ent ent = {
 		.opcode = CMDQ_OP_CMD_SYNC,
 	};
@@ -367,10 +385,12 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu,
 	}

 	arm_smmu_cmdq_build_cmd(cmd, &ent);
+	if (arm_smmu_cmdq_needs_busy_polling(smmu, cmdq))
+		u64p_replace_bits(cmd, CMDQ_SYNC_0_CS_NONE, CMDQ_SYNC_0_CS);
 }

-static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
-				     struct arm_smmu_queue *q)
+void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
+			      struct arm_smmu_cmdq *cmdq)
 {
 	static const char * const cerror_str[] = {
 		[CMDQ_ERR_CERROR_NONE_IDX]	= "No error",
@@ -378,6 +398,7 @@ static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
 		[CMDQ_ERR_CERROR_ABT_IDX]	= "Abort on command fetch",
 		[CMDQ_ERR_CERROR_ATC_INV_IDX]	= "ATC invalidate timeout",
 	};
+	struct arm_smmu_queue *q = &cmdq->q;

 	int i;
 	u64 cmd[CMDQ_ENT_DWORDS];
@@ -420,13 +441,15 @@ static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,

 	/* Convert the erroneous command into a CMD_SYNC */
 	arm_smmu_cmdq_build_cmd(cmd, &cmd_sync);
+	if (arm_smmu_cmdq_needs_busy_polling(smmu, cmdq))
+		u64p_replace_bits(cmd, CMDQ_SYNC_0_CS_NONE, CMDQ_SYNC_0_CS);

 	queue_write(Q_ENT(q, cons), cmd, q->ent_dwords);
 }

 static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
 {
-	__arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq.q);
+	__arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq);
 }

 /*
@@ -591,11 +614,11 @@ static void arm_smmu_cmdq_poll_valid_map(struct arm_smmu_cmdq *cmdq,

 /* Wait for the command queue to become non-full */
 static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
+					     struct arm_smmu_cmdq *cmdq,
 					     struct arm_smmu_ll_queue *llq)
 {
 	unsigned long flags;
 	struct arm_smmu_queue_poll qp;
-	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
 	int ret = 0;

 	/*
@@ -626,11 +649,11 @@ static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
 * Must be called with the cmdq lock held in some capacity.
 */
 static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
+					  struct arm_smmu_cmdq *cmdq,
 					  struct arm_smmu_ll_queue *llq)
 {
 	int ret = 0;
 	struct arm_smmu_queue_poll qp;
-	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
 	u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));

 	queue_poll_init(smmu, &qp);
@@ -650,10 +673,10 @@ static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu,
 * Must be called with the cmdq lock held in some capacity.
 */
 static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
+					       struct arm_smmu_cmdq *cmdq,
 					       struct arm_smmu_ll_queue *llq)
 {
 	struct arm_smmu_queue_poll qp;
-	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
 	u32 prod = llq->prod;
 	int ret = 0;

@@ -700,12 +723,14 @@ static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu,
 }

 static int arm_smmu_cmdq_poll_until_sync(struct arm_smmu_device *smmu,
+					 struct arm_smmu_cmdq *cmdq,
 					 struct arm_smmu_ll_queue *llq)
 {
-	if (smmu->options & ARM_SMMU_OPT_MSIPOLL)
-		return __arm_smmu_cmdq_poll_until_msi(smmu, llq);
+	if (smmu->options & ARM_SMMU_OPT_MSIPOLL &&
+	    !arm_smmu_cmdq_needs_busy_polling(smmu, cmdq))
+		return __arm_smmu_cmdq_poll_until_msi(smmu, cmdq, llq);

-	return __arm_smmu_cmdq_poll_until_consumed(smmu, llq);
+	return __arm_smmu_cmdq_poll_until_consumed(smmu, cmdq, llq);
 }

 static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
@@ -742,13 +767,13 @@ static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds,
 *   CPU will appear before any of the commands from the other CPU.
 */
 static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
+				       struct arm_smmu_cmdq *cmdq,
 				       u64 *cmds, int n, bool sync)
 {
 	u64 cmd_sync[CMDQ_ENT_DWORDS];
 	u32 prod;
 	unsigned long flags;
 	bool owner;
-	struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu);
 	struct arm_smmu_ll_queue llq, head;
 	int ret = 0;

@@ -762,7 +787,7 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,

 		while (!queue_has_space(&llq, n + sync)) {
 			local_irq_restore(flags);
-			if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq))
+			if (arm_smmu_cmdq_poll_until_not_full(smmu, cmdq, &llq))
 				dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
 			local_irq_save(flags);
 		}
@@ -788,7 +813,7 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
 	arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
 	if (sync) {
 		prod = queue_inc_prod_n(&llq, n);
-		arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, &cmdq->q, prod);
+		arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, cmdq, prod);
 		queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);

 		/*
@@ -838,7 +863,7 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
 	/* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */
 	if (sync) {
 		llq.prod = queue_inc_prod_n(&llq, n);
-		ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
+		ret = arm_smmu_cmdq_poll_until_sync(smmu, cmdq, &llq);
 		if (ret) {
 			dev_err_ratelimited(smmu->dev,
 					    "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
@@ -873,7 +898,8 @@ static int __arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
 		return -EINVAL;
 	}

-	return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, sync);
+	return arm_smmu_cmdq_issue_cmdlist(
+		smmu, arm_smmu_get_cmdq(smmu, ent), cmd, 1, sync);
 }

 static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
@@ -888,21 +914,33 @@ static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu,
 	return __arm_smmu_cmdq_issue_cmd(smmu, ent, true);
 }

+static void arm_smmu_cmdq_batch_init(struct arm_smmu_device *smmu,
+				     struct arm_smmu_cmdq_batch *cmds,
+				     struct arm_smmu_cmdq_ent *ent)
+{
+	cmds->num = 0;
+	cmds->cmdq = arm_smmu_get_cmdq(smmu, ent);
+}
+
 static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
 				    struct arm_smmu_cmdq_batch *cmds,
 				    struct arm_smmu_cmdq_ent *cmd)
 {
+	bool unsupported_cmd = !arm_smmu_cmdq_supports_cmd(cmds->cmdq, cmd);
+	bool force_sync = (cmds->num == CMDQ_BATCH_ENTRIES - 1) &&
+			  (smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC);
 	int index;

-	if (cmds->num == CMDQ_BATCH_ENTRIES - 1 &&
-	    (smmu->options & ARM_SMMU_OPT_CMDQ_FORCE_SYNC)) {
-		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
-		cmds->num = 0;
+	if (force_sync || unsupported_cmd) {
+		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds,
+					    cmds->num, true);
+		arm_smmu_cmdq_batch_init(smmu, cmds, cmd);
 	}

 	if (cmds->num == CMDQ_BATCH_ENTRIES) {
-		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
-		cmds->num = 0;
+		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds,
+					    cmds->num, false);
+		arm_smmu_cmdq_batch_init(smmu, cmds, cmd);
 	}

 	index = cmds->num * CMDQ_ENT_DWORDS;
@@ -918,7 +956,9 @@ static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
 static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
 				      struct arm_smmu_cmdq_batch *cmds)
 {
-	return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
+	return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmdq, cmds->cmds,
+					   cmds->num, true);
+
 }

 static int arm_smmu_page_response(struct device *dev,
@@ -985,7 +1025,7 @@ static void arm_smmu_sync_cd(struct arm_smmu_master *master,
 		},
 	};

-	cmds.num = 0;
+	arm_smmu_cmdq_batch_init(smmu, &cmds, &cmd);
 	for (i = 0; i < master->num_streams; i++) {
 		cmd.cfgi.sid = master->streams[i].id;
 		arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
@@ -1786,7 +1826,7 @@ static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)

 	arm_smmu_atc_inv_to_cmd(IOMMU_NO_PASID, 0, 0, &cmd);

-	cmds.num = 0;
+	arm_smmu_cmdq_batch_init(master->smmu, &cmds, &cmd);
 	for (i = 0; i < master->num_streams; i++) {
 		cmd.atc.sid = master->streams[i].id;
 		arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd);
@@ -1800,7 +1840,9 @@ int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
 {
 	int i;
 	unsigned long flags;
-	struct arm_smmu_cmdq_ent cmd;
+	struct arm_smmu_cmdq_ent cmd = {
+		.opcode = CMDQ_OP_ATC_INV,
+	};
 	struct arm_smmu_master *master;
 	struct arm_smmu_cmdq_batch cmds;

@@ -1826,7 +1868,7 @@ int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,

 	arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);

-	cmds.num = 0;
+	arm_smmu_cmdq_batch_init(smmu_domain->smmu, &cmds, &cmd);

 	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
 	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
@@ -1903,7 +1945,7 @@ static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd,
 			num_pages++;
 	}

-	cmds.num = 0;
+	arm_smmu_cmdq_batch_init(smmu, &cmds, cmd);

 	while (iova < end) {
 		if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
@@ -2872,12 +2914,10 @@ static struct iommu_ops arm_smmu_ops = {
 };

 /* Probing and initialisation functions */
-static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
-				   struct arm_smmu_queue *q,
-				   void __iomem *page,
-				   unsigned long prod_off,
-				   unsigned long cons_off,
-				   size_t dwords, const char *name)
+int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
+			    struct arm_smmu_queue *q, void __iomem *page,
+			    unsigned long prod_off, unsigned long cons_off,
+			    size_t dwords, const char *name)
 {
 	size_t qsz;

@@ -2915,9 +2955,9 @@ static int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
 	return 0;
 }

-static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu)
+int arm_smmu_cmdq_init(struct arm_smmu_device *smmu,
+		       struct arm_smmu_cmdq *cmdq)
 {
-	struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
 	unsigned int nents = 1 << cmdq->q.llq.max_n_shift;

 	atomic_set(&cmdq->owner_prod, 0);
@@ -2942,7 +2982,7 @@ static int arm_smmu_init_queues(struct arm_smmu_device *smmu)
 	if (ret)
 		return ret;

-	ret = arm_smmu_cmdq_init(smmu);
+	ret = arm_smmu_cmdq_init(smmu, &smmu->cmdq);
 	if (ret)
 		return ret;

@@ -3088,7 +3128,14 @@ static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
 	if (ret)
 		return ret;

-	return arm_smmu_init_strtab(smmu);
+	ret = arm_smmu_init_strtab(smmu);
+	if (ret)
+		return ret;
+
+	if (smmu->impl_ops && smmu->impl_ops->init_structures)
+		return smmu->impl_ops->init_structures(smmu);
+
+	return 0;
 }

 static int arm_smmu_write_reg_sync(struct arm_smmu_device *smmu, u32 val,
@@ -3149,6 +3196,15 @@ static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
 	int ret, nvec = ARM_SMMU_MAX_MSIS;
 	struct device *dev = smmu->dev;

+	/* Clear the MSI address regs */
+	writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
+	writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
+
+	if (smmu->features & ARM_SMMU_FEAT_PRI)
+		writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
+	else
+		nvec--;
+
 	if (!(smmu->features & ARM_SMMU_FEAT_MSI))
 		return;

@@ -3157,9 +3213,6 @@ static void arm_smmu_setup_msis(struct arm_smmu_device *smmu)
 		return;
 	}

-	if (!(smmu->features & ARM_SMMU_FEAT_PRI))
-		nvec--;
-
 	/* Allocate MSIs for evtq, gerror and priq. Ignore cmdq */
 	ret = platform_msi_domain_alloc_irqs(dev, nvec, arm_smmu_write_msi_msg);
 	if (ret) {
@@ -3221,9 +3274,9 @@ static void arm_smmu_setup_unique_irqs(struct arm_smmu_device *smmu)
 	}
 }

-static int arm_smmu_reset_irqs(struct arm_smmu_device *smmu)
+static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
 {
-	int ret;
+	int ret, irq;
 	u32 irqen_flags = IRQ_CTRL_EVTQ_IRQEN | IRQ_CTRL_GERROR_IRQEN;

 	/* Disable IRQs first */
@@ -3234,35 +3287,7 @@ static int arm_smmu_reset_irqs(struct arm_smmu_device *smmu)
 		return ret;
 	}

-	if (!smmu->combined_irq) {
-		/*
-		 * Clear the MSI address regs. These registers will be reset
-		 * in arm_smmu_write_msi_msg callback function by irq_domain
-		 * upon a new MSI message.
-		 */
-		writeq_relaxed(0, smmu->base + ARM_SMMU_GERROR_IRQ_CFG0);
-		writeq_relaxed(0, smmu->base + ARM_SMMU_EVTQ_IRQ_CFG0);
-
-		if (smmu->features & ARM_SMMU_FEAT_PRI)
-			writeq_relaxed(0, smmu->base + ARM_SMMU_PRIQ_IRQ_CFG0);
-	}
-
-	if (smmu->features & ARM_SMMU_FEAT_PRI)
-		irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
-
-	/* Enable interrupt generation on the SMMU */
-	ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
-				      ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
-	if (ret)
-		dev_warn(smmu->dev, "failed to enable irqs\n");
-
-	return ret;
-}
-
-static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
-{
-	int ret = 0, irq = smmu->combined_irq;
-
+	irq = smmu->combined_irq;
 	if (irq) {
 		/*
 		 * Cavium ThunderX2 implementation doesn't support unique irq
@@ -3278,7 +3303,16 @@ static int arm_smmu_setup_irqs(struct arm_smmu_device *smmu)
 	} else
 		arm_smmu_setup_unique_irqs(smmu);

-	return ret;
+	if (smmu->features & ARM_SMMU_FEAT_PRI)
+		irqen_flags |= IRQ_CTRL_PRIQ_IRQEN;
+
+	/* Enable interrupt generation on the SMMU */
+	ret = arm_smmu_write_reg_sync(smmu, irqen_flags,
+				      ARM_SMMU_IRQ_CTRL, ARM_SMMU_IRQ_CTRLACK);
+	if (ret)
+		dev_warn(smmu->dev, "failed to enable irqs\n");
+
+	return 0;
 }

 static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
@@ -3292,7 +3326,7 @@ static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
 	return ret;
 }

-static int arm_smmu_device_reset(struct arm_smmu_device *smmu)
+static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
 {
 	int ret;
 	u32 reg, enables;
@@ -3400,17 +3434,11 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu)
 		}
 	}

-	ret = arm_smmu_reset_irqs(smmu);
-	if (ret) {
-		dev_err(smmu->dev, "failed to reset irqs\n");
-		return ret;
-	}
-
 	if (is_kdump_kernel())
 		enables &= ~(CR0_EVTQEN | CR0_PRIQEN);

 	/* Enable the SMMU interface, or ensure bypass */
-	if (!smmu->bypass || disable_bypass) {
+	if (!bypass || disable_bypass) {
 		enables |= CR0_SMMUEN;
 	} else {
 		ret = arm_smmu_update_gbpa(smmu, 0, GBPA_ABORT);
@@ -3424,6 +3452,14 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu)
 		return ret;
 	}

+	if (smmu->impl_ops && smmu->impl_ops->device_reset) {
+		ret = smmu->impl_ops->device_reset(smmu);
+		if (ret) {
+			dev_err(smmu->dev, "failed to reset impl\n");
+			return ret;
+		}
+	}
+
 	return 0;
 }

@@ -3685,19 +3721,84 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
 	return 0;
 }

-#ifdef CONFIG_ACPI
-static void acpi_smmu_get_options(u32 model, struct arm_smmu_device *smmu)
+#ifdef CONFIG_TEGRA241_CMDQV
+static void tegra_cmdqv_dt_probe(struct device_node *smmu_node,
+				 struct arm_smmu_device *smmu)
 {
-	switch (model) {
+	struct platform_device *pdev;
+	struct device_node *np;
+
+	np = of_parse_phandle(smmu_node, "nvidia,cmdqv", 0);
+	if (!np)
+		return;
+
+	pdev = of_find_device_by_node(np);
+	of_node_put(np);
+	if (!pdev)
+		return;
+
+	smmu->impl_dev = &pdev->dev;
+	smmu->options |= ARM_SMMU_OPT_TEGRA241_CMDQV;
+	dev_info(smmu->dev, "found companion CMDQV device: %s\n",
+			dev_name(smmu->impl_dev));
+}
+#else
+static void tegra_cmdqv_dt_probe(struct device_node *smmu_node,
+				 struct arm_smmu_device *smmu)
+{
+}
+#endif
+
+#ifdef CONFIG_ACPI
+#ifdef CONFIG_TEGRA241_CMDQV
+static void acpi_smmu_dsdt_probe_tegra241_cmdqv(struct acpi_iort_node *node,
+						struct arm_smmu_device *smmu)
+{
+	const char *uid = kasprintf(GFP_KERNEL, "%u", node->identifier);
+	struct acpi_device *adev;
+
+	/* Look for an NVDA200C node whose _UID matches the SMMU node ID */
+	adev = acpi_dev_get_first_match_dev("NVDA200C", uid, -1);
+	if (adev) {
+		/* Tegra241 CMDQV driver is responsible for put_device() */
+		smmu->impl_dev = &adev->dev;
+		smmu->options |= ARM_SMMU_OPT_TEGRA241_CMDQV;
+		dev_info(smmu->dev, "found companion CMDQV device: %s\n",
+			 dev_name(smmu->impl_dev));
+	}
+	kfree(uid);
+}
+#else
+static void acpi_smmu_dsdt_probe_tegra241_cmdqv(struct acpi_iort_node *node,
+						struct arm_smmu_device *smmu)
+{
+}
+#endif
+
+static int acpi_smmu_iort_probe_model(struct acpi_iort_node *node,
+				      struct arm_smmu_device *smmu)
+{
+	struct acpi_iort_smmu_v3 *iort_smmu =
+		(struct acpi_iort_smmu_v3 *)node->node_data;
+
+	switch (iort_smmu->model) {
 	case ACPI_IORT_SMMU_V3_CAVIUM_CN99XX:
 		smmu->options |= ARM_SMMU_OPT_PAGE0_REGS_ONLY;
 		break;
 	case ACPI_IORT_SMMU_V3_HISILICON_HI161X:
 		smmu->options |= ARM_SMMU_OPT_SKIP_PREFETCH;
 		break;
+	case ACPI_IORT_SMMU_V3_GENERIC:
+		/*
+		 * Tegra241 implementation stores its SMMU options and impl_dev
+		 * in DSDT. Thus, go through the ACPI tables unconditionally.
+		 */
+		acpi_smmu_dsdt_probe_tegra241_cmdqv(node, smmu);
+		break;
 	}

 	dev_notice(smmu->dev, "option mask 0x%x\n", smmu->options);
+	return 0;
 }

 static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
@@ -3712,12 +3813,10 @@ static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
 	/* Retrieve SMMUv3 specific data */
 	iort_smmu = (struct acpi_iort_smmu_v3 *)node->node_data;

-	acpi_smmu_get_options(iort_smmu->model, smmu);
-
 	if (iort_smmu->flags & ACPI_IORT_SMMU_V3_COHACC_OVERRIDE)
 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;

-	return 0;
+	return acpi_smmu_iort_probe_model(node, smmu);
 }
 #else
 static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
@@ -3734,6 +3833,9 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev,
 	u32 cells;
 	int ret = -EINVAL;

+	if (!of_reserved_mem_device_init(dev))
+		dev_info(dev, "using device-specific reserved memory\n");
+
 	if (of_property_read_u32(dev->of_node, "#iommu-cells", &cells))
 		dev_err(dev, "missing #iommu-cells property\n");
 	else if (cells != 1)
@@ -3746,6 +3848,8 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev,
 	if (of_dma_is_coherent(dev->of_node))
 		smmu->features |= ARM_SMMU_FEAT_COHERENCY;

+	tegra_cmdqv_dt_probe(dev->of_node, smmu);
+
 	return ret;
 }

@@ -3795,6 +3899,39 @@ static void arm_smmu_rmr_install_bypass_ste(struct arm_smmu_device *smmu)
 	iort_put_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
 }

+static void arm_smmu_impl_remove(void *data)
+{
+	struct arm_smmu_device *smmu = data;
+
+	if (smmu->impl_ops && smmu->impl_ops->device_remove)
+		smmu->impl_ops->device_remove(smmu);
+}
+
+/*
+ * Probe all the compiled in implementations. Each one checks to see if it
+ * matches this HW and if so returns a devm_krealloc'd arm_smmu_device which
+ * replaces the callers. Otherwise the original is returned or ERR_PTR.
+ */
+static struct arm_smmu_device *arm_smmu_impl_probe(struct arm_smmu_device *smmu)
+{
+	struct arm_smmu_device *new_smmu = ERR_PTR(-ENODEV);
+	int ret;
+
+	if (smmu->impl_dev && (smmu->options & ARM_SMMU_OPT_TEGRA241_CMDQV))
+		new_smmu = tegra241_cmdqv_probe(smmu);
+
+	if (new_smmu == ERR_PTR(-ENODEV))
+		return smmu;
+	if (IS_ERR(new_smmu))
+		return new_smmu;
+
+	ret = devm_add_action_or_reset(new_smmu->dev, arm_smmu_impl_remove,
+				       new_smmu);
+	if (ret)
+		return ERR_PTR(ret);
+	return new_smmu;
+}
+
 static int arm_smmu_device_probe(struct platform_device *pdev)
 {
 	int irq, ret;
@@ -3815,10 +3952,13 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
 		if (ret == -ENODEV)
 			return ret;
 	}
-
 	/* Set bypass mode according to firmware probing result */
 	smmu->bypass = !!ret;

+	smmu = arm_smmu_impl_probe(smmu);
+	if (IS_ERR(smmu))
+		return PTR_ERR(smmu);
+
 	/* Base address */
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!res)
@@ -3880,15 +4020,17 @@ static int arm_smmu_device_probe(struct platform_device *pdev)
 	/* Check for RMRs and install bypass STEs if any */
 	arm_smmu_rmr_install_bypass_ste(smmu);

-	ret = arm_smmu_setup_irqs(smmu);
-	if (ret)
-		return ret;
-
 	/* Reset the device */
-	ret = arm_smmu_device_reset(smmu);
+	ret = arm_smmu_device_reset(smmu, smmu->bypass);
 	if (ret)
 		goto err_disable;

+	ret = arm_smmu_setup_irqs(smmu);
+	if (ret) {
+		dev_err(smmu->dev, "failed to setup irqs\n");
+		return ret;
+	}
+
 	/* And we're up. Go go go! */
 	ret = iommu_device_sysfs_add(&smmu->iommu, dev, NULL,
 				     "smmu3.%pa", &ioaddr);
@@ -3946,11 +4088,32 @@ static int __maybe_unused arm_smmu_runtime_resume(struct device *dev)
 {
 	struct arm_smmu_device *smmu = dev_get_drvdata(dev);

-	return arm_smmu_device_reset(smmu);
+	dev_dbg(dev, "Resuming\n");
+	arm_smmu_device_reset(smmu, smmu->bypass);
+
+	return 0;
+}
+
+static int __maybe_unused arm_smmu_runtime_suspend(struct device *dev)
+{
+	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
+	struct arm_smmu_cmdq_ent cmd;
+
+	cmd.opcode = CMDQ_OP_CFGI_ALL;
+	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
+
+	cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL;
+	arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
+
+	dev_dbg(dev, "Disabling\n");
+	arm_smmu_device_disable(smmu);
+
+	dev_dbg(dev, "Suspending\n");
+	return 0;
 }

 static const struct dev_pm_ops arm_smmu_pm_ops = {
-	SET_SYSTEM_SLEEP_PM_OPS(NULL, arm_smmu_runtime_resume)
+	SET_LATE_SYSTEM_SLEEP_PM_OPS(arm_smmu_runtime_suspend, arm_smmu_runtime_resume)
 };

 static struct platform_driver arm_smmu_driver = {
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -14,6 +14,8 @@
 #include <linux/mmzone.h>
 #include <linux/sizes.h>

+struct arm_smmu_device;
+
 /* MMIO registers */
 #define ARM_SMMU_IDR0			0x0
 #define IDR0_ST_LVL			GENMASK(28, 27)
@@ -555,10 +557,18 @@ struct arm_smmu_cmdq {
 	atomic_long_t			*valid_map;
 	atomic_t			owner_prod;
 	atomic_t			lock;
+	bool				(*supports_cmd)(struct arm_smmu_cmdq_ent *ent);
 };

+static inline bool arm_smmu_cmdq_supports_cmd(struct arm_smmu_cmdq *cmdq,
+					      struct arm_smmu_cmdq_ent *ent)
+{
+	return cmdq->supports_cmd ? cmdq->supports_cmd(ent) : true;
+}
+
 struct arm_smmu_cmdq_batch {
 	u64				cmds[CMDQ_BATCH_ENTRIES * CMDQ_ENT_DWORDS];
+	struct arm_smmu_cmdq		*cmdq;
 	int				num;
 };

@@ -623,9 +633,20 @@ struct arm_smmu_strtab_cfg {
 	u32				strtab_base_cfg;
 };

+struct arm_smmu_impl_ops {
+	int (*device_reset)(struct arm_smmu_device *smmu);
+	void (*device_remove)(struct arm_smmu_device *smmu);
+	int (*init_structures)(struct arm_smmu_device *smmu);
+	struct arm_smmu_cmdq *(*get_secondary_cmdq)(
+		struct arm_smmu_device *smmu, struct arm_smmu_cmdq_ent *ent);
+};
+
 /* An SMMUv3 instance */
 struct arm_smmu_device {
 	struct device			*dev;
+	struct device			*impl_dev;
+	const struct arm_smmu_impl_ops	*impl_ops;
+
 	void __iomem			*base;
 	void __iomem			*page1;

@@ -655,6 +676,7 @@ struct arm_smmu_device {
 #define ARM_SMMU_OPT_PAGE0_REGS_ONLY	(1 << 1)
 #define ARM_SMMU_OPT_MSIPOLL		(1 << 2)
 #define ARM_SMMU_OPT_CMDQ_FORCE_SYNC	(1 << 3)
+#define ARM_SMMU_OPT_TEGRA241_CMDQV	(1 << 4)
 	u32				options;

 	struct arm_smmu_cmdq		cmdq;
@@ -686,7 +708,7 @@ struct arm_smmu_device {
 	struct rb_root			streams;
 	struct mutex			streams_mutex;

-	bool                            bypass;
+	bool				bypass;
 };

 struct arm_smmu_stream {
@@ -760,6 +782,15 @@ bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd);
 int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
 			    unsigned long iova, size_t size);

+void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu,
+			      struct arm_smmu_cmdq *cmdq);
+int arm_smmu_init_one_queue(struct arm_smmu_device *smmu,
+			    struct arm_smmu_queue *q, void __iomem *page,
+			    unsigned long prod_off, unsigned long cons_off,
+			    size_t dwords, const char *name);
+int arm_smmu_cmdq_init(struct arm_smmu_device *smmu,
+		       struct arm_smmu_cmdq *cmdq);
+
 #ifdef CONFIG_ARM_SMMU_V3_SVA
 bool arm_smmu_sva_supported(struct arm_smmu_device *smmu);
 bool arm_smmu_master_sva_supported(struct arm_smmu_master *master);
@@ -815,4 +846,14 @@ static inline void arm_smmu_sva_remove_dev_pasid(struct iommu_domain *domain,
 {
 }
 #endif /* CONFIG_ARM_SMMU_V3_SVA */
+
+#ifdef CONFIG_TEGRA241_CMDQV
+struct arm_smmu_device *tegra241_cmdqv_probe(struct arm_smmu_device *smmu);
+#else /* CONFIG_TEGRA241_CMDQV */
+static inline struct arm_smmu_device *
+tegra241_cmdqv_probe(struct arm_smmu_device *smmu)
+{
+	return ERR_PTR(-ENODEV);
+}
+#endif /* CONFIG_TEGRA241_CMDQV */
 #endif /* _ARM_SMMU_V3_H */
--- a/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
+++ b/drivers/iommu/arm/arm-smmu-v3/tegra241-cmdqv.c
@@ -0,0 +1,941 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2021-2024 NVIDIA CORPORATION & AFFILIATES. */
+
+#define dev_fmt(fmt) "tegra241_cmdqv: " fmt
+
+#include <linux/acpi.h>
+#include <linux/debugfs.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/iommu.h>
+#include <linux/iopoll.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+
+#include <acpi/acpixf.h>
+
+#include "arm-smmu-v3.h"
+
+/* CMDQV register page base and size defines */
+#define TEGRA241_CMDQV_CONFIG_BASE	(0)
+#define TEGRA241_CMDQV_CONFIG_SIZE	(SZ_64K)
+#define TEGRA241_VCMDQ_PAGE0_BASE	(TEGRA241_CMDQV_CONFIG_BASE + SZ_64K)
+#define TEGRA241_VCMDQ_PAGE1_BASE	(TEGRA241_VCMDQ_PAGE0_BASE + SZ_64K)
+#define TEGRA241_VINTF_PAGE_BASE	(TEGRA241_VCMDQ_PAGE1_BASE + SZ_64K)
+
+/* CMDQV global base regs */
+#define TEGRA241_CMDQV_CONFIG		0x0000
+#define  CMDQV_EN			BIT(0)
+
+#define TEGRA241_CMDQV_PARAM		0x0004
+#define  CMDQV_NUM_VINTF_LOG2		GENMASK(11, 8)
+#define  CMDQV_NUM_VCMDQ_LOG2		GENMASK(7, 4)
+
+#define TEGRA241_CMDQV_STATUS		0x0008
+#define  CMDQV_ENABLED			BIT(0)
+
+#define TEGRA241_CMDQV_VINTF_ERR_MAP	0x0014
+#define TEGRA241_CMDQV_VINTF_INT_MASK	0x001C
+#define TEGRA241_CMDQV_CMDQ_ERR_MAP(m)  (0x0024 + 0x4*(m))
+
+#define TEGRA241_CMDQV_CMDQ_ALLOC(q)	(0x0200 + 0x4*(q))
+#define  CMDQV_CMDQ_ALLOC_VINTF		GENMASK(20, 15)
+#define  CMDQV_CMDQ_ALLOC_LVCMDQ	GENMASK(7, 1)
+#define  CMDQV_CMDQ_ALLOCATED		BIT(0)
+
+/* VINTF base regs */
+#define TEGRA241_VINTF(v)		(0x1000 + 0x100*(v))
+
+#define TEGRA241_VINTF_CONFIG		0x0000
+#define  VINTF_HYP_OWN			BIT(17)
+#define  VINTF_VMID			GENMASK(16, 1)
+#define  VINTF_EN			BIT(0)
+
+#define TEGRA241_VINTF_STATUS		0x0004
+#define  VINTF_STATUS			GENMASK(3, 1)
+#define  VINTF_ENABLED			BIT(0)
+
+#define TEGRA241_VINTF_LVCMDQ_ERR_MAP_64(m) \
+					(0x00C0 + 0x8*(m))
+#define  LVCMDQ_ERR_MAP_NUM_64		2
+
+/* VCMDQ base regs */
+/* -- PAGE0 -- */
+#define TEGRA241_VCMDQ_PAGE0(q)		(TEGRA241_VCMDQ_PAGE0_BASE + 0x80*(q))
+
+#define TEGRA241_VCMDQ_CONS		0x00000
+#define  VCMDQ_CONS_ERR			GENMASK(30, 24)
+
+#define TEGRA241_VCMDQ_PROD		0x00004
+
+#define TEGRA241_VCMDQ_CONFIG		0x00008
+#define  VCMDQ_EN			BIT(0)
+
+#define TEGRA241_VCMDQ_STATUS		0x0000C
+#define  VCMDQ_ENABLED			BIT(0)
+
+#define TEGRA241_VCMDQ_GERROR		0x00010
+#define TEGRA241_VCMDQ_GERRORN		0x00014
+
+/* -- PAGE1 -- */
+#define TEGRA241_VCMDQ_PAGE1(q)		(TEGRA241_VCMDQ_PAGE1_BASE + 0x80*(q))
+#define  VCMDQ_ADDR			GENMASK(47, 5)
+#define  VCMDQ_LOG2SIZE			GENMASK(4, 0)
+
+#define TEGRA241_VCMDQ_BASE			0x00000
+#define TEGRA241_VCMDQ_BASE_H			0x00004
+#define TEGRA241_VCMDQ_CONS_INDX_BASE		0x00008
+#define TEGRA241_VCMDQ_CONS_INDX_BASE_H		0x0000C
+
+/* VINTF logical-VCMDQ pages */
+#define TEGRA241_VINTFi_PAGE0(i)	(TEGRA241_VINTF_PAGE_BASE + SZ_128K*(i))
+#define TEGRA241_VINTFi_PAGE1(i)	(TEGRA241_VINTFi_PAGE0(i) + SZ_64K)
+#define TEGRA241_VINTFi_LVCMDQ_PAGE0(i, q) \
+					(TEGRA241_VINTFi_PAGE0(i) + 0x80*(q))
+#define TEGRA241_VINTFi_LVCMDQ_PAGE1(i, q) \
+					(TEGRA241_VINTFi_PAGE1(i) + 0x80*(q))
+
+/* MMIO helpers */
+#define REG_CMDQV(_cmdqv, _regname) \
+	((_cmdqv)->base + TEGRA241_CMDQV_##_regname)
+#define REG_VINTF(_vintf, _regname) \
+	((_vintf)->base + TEGRA241_VINTF_##_regname)
+#define REG_VCMDQ_PAGE0(_vcmdq, _regname) \
+	((_vcmdq)->page0 + TEGRA241_VCMDQ_##_regname)
+#define REG_VCMDQ_PAGE1(_vcmdq, _regname) \
+	((_vcmdq)->page1 + TEGRA241_VCMDQ_##_regname)
+
+
+static bool disable_cmdqv;
+module_param(disable_cmdqv, bool, 0444);
+MODULE_PARM_DESC(disable_cmdqv,
+	"This allows to disable CMDQV HW and use default SMMU internal CMDQ.");
+
+static bool bypass_vcmdq;
+module_param(bypass_vcmdq, bool, 0444);
+MODULE_PARM_DESC(bypass_vcmdq,
+	"This allows to bypass VCMDQ for debugging use or perf comparison.");
+
+/**
+ * struct tegra241_vcmdq - Virtual Command Queue
+ * @idx: Global index in the CMDQV
+ * @lidx: Local index in the VINTF
+ * @enabled: Enable status
+ * @cmdqv: Parent CMDQV pointer
+ * @vintf: Parent VINTF pointer
+ * @cmdq: Command Queue struct
+ * @page0: MMIO Page0 base address
+ * @page1: MMIO Page1 base address
+ */
+struct tegra241_vcmdq {
+	u16 idx;
+	u16 lidx;
+
+	bool enabled;
+
+	struct tegra241_cmdqv *cmdqv;
+	struct tegra241_vintf *vintf;
+	struct arm_smmu_cmdq cmdq;
+
+	void __iomem *page0;
+	void __iomem *page1;
+};
+
+/**
+ * struct tegra241_vintf - Virtual Interface
+ * @idx: Global index in the CMDQV
+ * @enabled: Enable status
+ * @hyp_own: Owned by hypervisor (in-kernel)
+ * @cmdqv: Parent CMDQV pointer
+ * @lvcmdqs: List of logical VCMDQ pointers
+ * @base: MMIO base address
+ */
+struct tegra241_vintf {
+	u16 idx;
+
+	bool enabled;
+	bool hyp_own;
+
+	struct tegra241_cmdqv *cmdqv;
+	struct tegra241_vcmdq **lvcmdqs;
+
+	void __iomem *base;
+};
+
+/**
+ * struct tegra241_cmdqv - CMDQ-V for SMMUv3
+ * @smmu: SMMUv3 device
+ * @dev: CMDQV device
+ * @base: MMIO base address
+ * @irq: IRQ number
+ * @num_vintfs: Total number of VINTFs
+ * @num_vcmdqs: Total number of VCMDQs
+ * @num_lvcmdqs_per_vintf: Number of logical VCMDQs per VINTF
+ * @vintf_ids: VINTF id allocator
+ * @vintfs: List of VINTFs
+ */
+struct tegra241_cmdqv {
+	struct arm_smmu_device smmu;
+	struct device *dev;
+
+	void __iomem *base;
+	int irq;
+
+	/* CMDQV Hardware Params */
+	u16 num_vintfs;
+	u16 num_vcmdqs;
+	u16 num_lvcmdqs_per_vintf;
+
+	struct ida vintf_ids;
+
+	struct tegra241_vintf **vintfs;
+};
+
+/* Config and Polling Helpers */
+
+static inline int tegra241_cmdqv_write_config(struct tegra241_cmdqv *cmdqv,
+					      void __iomem *addr_config,
+					      void __iomem *addr_status,
+					      u32 regval, const char *header,
+					      bool *out_enabled)
+{
+	bool en = regval & BIT(0);
+	int ret;
+
+	writel(regval, addr_config);
+	ret = readl_poll_timeout(addr_status, regval,
+				 en ? regval & BIT(0) : !(regval & BIT(0)),
+				 1, ARM_SMMU_POLL_TIMEOUT_US);
+	if (ret)
+		dev_err(cmdqv->dev, "%sfailed to %sable, STATUS=0x%08X\n",
+			header, en ? "en" : "dis", regval);
+	if (out_enabled)
+		WRITE_ONCE(*out_enabled, regval & BIT(0));
+	return ret;
+}
+
+static inline int cmdqv_write_config(struct tegra241_cmdqv *cmdqv, u32 regval)
+{
+	return tegra241_cmdqv_write_config(cmdqv,
+					   REG_CMDQV(cmdqv, CONFIG),
+					   REG_CMDQV(cmdqv, STATUS),
+					   regval, "CMDQV: ", NULL);
+}
+
+static inline int vintf_write_config(struct tegra241_vintf *vintf, u32 regval)
+{
+	char header[16];
+
+	snprintf(header, 16, "VINTF%u: ", vintf->idx);
+	return tegra241_cmdqv_write_config(vintf->cmdqv,
+					   REG_VINTF(vintf, CONFIG),
+					   REG_VINTF(vintf, STATUS),
+					   regval, header, &vintf->enabled);
+}
+
+static inline char *lvcmdq_error_header(struct tegra241_vcmdq *vcmdq,
+					char *header, int hlen)
+{
+	WARN_ON(hlen < 64);
+	if (WARN_ON(!vcmdq->vintf))
+		return "";
+	snprintf(header, hlen, "VINTF%u: VCMDQ%u/LVCMDQ%u: ",
+		 vcmdq->vintf->idx, vcmdq->idx, vcmdq->lidx);
+	return header;
+}
+
+static inline int vcmdq_write_config(struct tegra241_vcmdq *vcmdq, u32 regval)
+{
+	char header[64], *h = lvcmdq_error_header(vcmdq, header, 64);
+
+	return tegra241_cmdqv_write_config(vcmdq->cmdqv,
+					   REG_VCMDQ_PAGE0(vcmdq, CONFIG),
+					   REG_VCMDQ_PAGE0(vcmdq, STATUS),
+					   regval, h, &vcmdq->enabled);
+}
+
+/* ISR Functions */
+
+static void tegra241_vintf0_handle_error(struct tegra241_vintf *vintf)
+{
+	int i;
+
+	for (i = 0; i < LVCMDQ_ERR_MAP_NUM_64; i++) {
+		u64 map = readq_relaxed(REG_VINTF(vintf, LVCMDQ_ERR_MAP_64(i)));
+
+		while (map) {
+			unsigned long lidx = __ffs64(map);
+			struct tegra241_vcmdq *vcmdq = vintf->lvcmdqs[lidx];
+			u32 gerror = readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERROR));
+
+			__arm_smmu_cmdq_skip_err(&vintf->cmdqv->smmu, &vcmdq->cmdq);
+			writel(gerror, REG_VCMDQ_PAGE0(vcmdq, GERRORN));
+			map &= ~BIT_ULL(lidx);
+		}
+	}
+}
+
+static irqreturn_t tegra241_cmdqv_isr(int irq, void *devid)
+{
+	struct tegra241_cmdqv *cmdqv = (struct tegra241_cmdqv *)devid;
+	void __iomem *reg_vintf_map = REG_CMDQV(cmdqv, VINTF_ERR_MAP);
+	char err_str[256];
+	u64 vintf_map;
+
+	/* Use readl_relaxed() as register addresses are not 64-bit aligned */
+	vintf_map = (u64)readl_relaxed(reg_vintf_map + 0x4) << 32 |
+		    (u64)readl_relaxed(reg_vintf_map);
+
+	snprintf(err_str, sizeof(err_str),
+		 "vintf_map: %016llx, vcmdq_map %08x:%08x:%08x:%08x", vintf_map,
+		 readl_relaxed(REG_CMDQV(cmdqv, CMDQ_ERR_MAP(3))),
+		 readl_relaxed(REG_CMDQV(cmdqv, CMDQ_ERR_MAP(2))),
+		 readl_relaxed(REG_CMDQV(cmdqv, CMDQ_ERR_MAP(1))),
+		 readl_relaxed(REG_CMDQV(cmdqv, CMDQ_ERR_MAP(0))));
+
+	dev_warn(cmdqv->dev, "unexpected error reported. %s\n", err_str);
+
+	/* Handle VINTF0 and its LVCMDQs */
+	if (vintf_map & BIT_ULL(0)) {
+		tegra241_vintf0_handle_error(cmdqv->vintfs[0]);
+		vintf_map &= ~BIT_ULL(0);
+	}
+
+	return IRQ_HANDLED;
+}
+
+/* Command Queue Function */
+
+static bool tegra241_guest_vcmdq_supports_cmd(struct arm_smmu_cmdq_ent *ent)
+{
+	switch (ent->opcode) {
+	case CMDQ_OP_TLBI_NH_ASID:
+	case CMDQ_OP_TLBI_NH_VA:
+	case CMDQ_OP_ATC_INV:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static struct arm_smmu_cmdq *
+tegra241_cmdqv_get_cmdq(struct arm_smmu_device *smmu,
+			struct arm_smmu_cmdq_ent *ent)
+{
+	struct tegra241_cmdqv *cmdqv =
+		container_of(smmu, struct tegra241_cmdqv, smmu);
+	struct tegra241_vintf *vintf = cmdqv->vintfs[0];
+	struct tegra241_vcmdq *vcmdq;
+	u16 lidx;
+
+	if (READ_ONCE(bypass_vcmdq))
+		return NULL;
+
+	/* Use SMMU CMDQ if VINTF0 is uninitialized */
+	if (!READ_ONCE(vintf->enabled))
+		return NULL;
+
+	/*
+	 * Select a LVCMDQ to use. Here we use a temporal solution to
+	 * balance out traffic on cmdq issuing: each cmdq has its own
+	 * lock, if all cpus issue cmdlist using the same cmdq, only
+	 * one CPU at a time can enter the process, while the others
+	 * will be spinning at the same lock.
+	 */
+	lidx = raw_smp_processor_id() % cmdqv->num_lvcmdqs_per_vintf;
+	vcmdq = vintf->lvcmdqs[lidx];
+	if (!vcmdq || !READ_ONCE(vcmdq->enabled))
+		return NULL;
+
+	/* Unsupported CMD goes for smmu->cmdq pathway */
+	if (!arm_smmu_cmdq_supports_cmd(&vcmdq->cmdq, ent))
+		return NULL;
+	return &vcmdq->cmdq;
+}
+
+/* HW Reset Functions */
+
+static void tegra241_vcmdq_hw_deinit(struct tegra241_vcmdq *vcmdq)
+{
+	char header[64], *h = lvcmdq_error_header(vcmdq, header, 64);
+	u32 gerrorn, gerror;
+
+	if (vcmdq_write_config(vcmdq, 0)) {
+		dev_err(vcmdq->cmdqv->dev,
+			"%sGERRORN=0x%X, GERROR=0x%X, CONS=0x%X\n", h,
+			readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERRORN)),
+			readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERROR)),
+			readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, CONS)));
+	}
+	writel_relaxed(0, REG_VCMDQ_PAGE0(vcmdq, PROD));
+	writel_relaxed(0, REG_VCMDQ_PAGE0(vcmdq, CONS));
+	writel_relaxed(0, REG_VCMDQ_PAGE1(vcmdq, BASE_H));
+	writel_relaxed(0, REG_VCMDQ_PAGE1(vcmdq, BASE));
+	writel_relaxed(0, REG_VCMDQ_PAGE1(vcmdq, CONS_INDX_BASE_H));
+	writel_relaxed(0, REG_VCMDQ_PAGE1(vcmdq, CONS_INDX_BASE));
+
+	gerrorn = readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERRORN));
+	gerror = readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERROR));
+	if (gerror != gerrorn) {
+		dev_warn(vcmdq->cmdqv->dev,
+			 "%suncleared error detected, resetting\n", h);
+		writel(gerror, REG_VCMDQ_PAGE0(vcmdq, GERRORN));
+	}
+
+	dev_dbg(vcmdq->cmdqv->dev, "%sdeinited\n", h);
+}
+
+static int tegra241_vcmdq_hw_init(struct tegra241_vcmdq *vcmdq)
+{
+	char header[64], *h = lvcmdq_error_header(vcmdq, header, 64);
+	int ret;
+
+	/* Reset VCMDQ */
+	tegra241_vcmdq_hw_deinit(vcmdq);
+
+	/* Configure and enable VCMDQ */
+	writel_relaxed(upper_32_bits(vcmdq->cmdq.q.q_base), REG_VCMDQ_PAGE1(vcmdq, BASE_H));
+	writel_relaxed(lower_32_bits(vcmdq->cmdq.q.q_base), REG_VCMDQ_PAGE1(vcmdq, BASE));
+	writel_relaxed(vcmdq->cmdq.q.llq.prod, REG_VCMDQ_PAGE0(vcmdq, PROD));
+	writel_relaxed(vcmdq->cmdq.q.llq.cons, REG_VCMDQ_PAGE0(vcmdq, CONS));
+
+	ret = vcmdq_write_config(vcmdq, VCMDQ_EN);
+	if (ret) {
+		dev_err(vcmdq->cmdqv->dev,
+			"%sGERRORN=0x%X, GERROR=0x%X, CONS=0x%X\n", h,
+			readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERRORN)),
+			readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, GERROR)),
+			readl_relaxed(REG_VCMDQ_PAGE0(vcmdq, CONS)));
+		return ret;
+	}
+
+	dev_dbg(vcmdq->cmdqv->dev, "%sinited\n", h);
+	return 0;
+}
+
+static void tegra241_vintf_hw_deinit(struct tegra241_vintf *vintf)
+{
+	u16 lidx;
+
+	for (lidx = 0; lidx < vintf->cmdqv->num_lvcmdqs_per_vintf; lidx++)
+		if (vintf->lvcmdqs && vintf->lvcmdqs[lidx])
+			tegra241_vcmdq_hw_deinit(vintf->lvcmdqs[lidx]);
+	vintf_write_config(vintf, 0);
+}
+
+static int tegra241_vintf_hw_init(struct tegra241_vintf *vintf, bool hyp_own)
+{
+	u32 regval;
+	u16 lidx;
+	int ret;
+
+	/* Reset VINTF */
+	tegra241_vintf_hw_deinit(vintf);
+
+	/* Configure and enable VINTF */
+	/*
+	 * Note that HYP_OWN bit is wired to zero when running in guest kernel,
+	 * whether enabling it here or not, as !HYP_OWN cmdq HWs only support a
+	 * restricted set of supported commands.
+	 */
+	regval = FIELD_PREP(VINTF_HYP_OWN, hyp_own);
+	writel(regval, REG_VINTF(vintf, CONFIG));
+
+	ret = vintf_write_config(vintf, regval | VINTF_EN);
+	if (ret)
+		return ret;
+	/*
+	 * As being mentioned above, HYP_OWN bit is wired to zero for a guest
+	 * kernel, so read it back from HW to ensure that reflects in hyp_own
+	 */
+	vintf->hyp_own = !!(VINTF_HYP_OWN & readl(REG_VINTF(vintf, CONFIG)));
+
+	for (lidx = 0; lidx < vintf->cmdqv->num_lvcmdqs_per_vintf; lidx++) {
+		if (vintf->lvcmdqs && vintf->lvcmdqs[lidx]) {
+			ret = tegra241_vcmdq_hw_init(vintf->lvcmdqs[lidx]);
+			if (ret) {
+				tegra241_vintf_hw_deinit(vintf);
+				return ret;
+			}
+		}
+	}
+
+	return 0;
+}
+
+static int tegra241_cmdqv_hw_reset(struct arm_smmu_device *smmu)
+{
+	struct tegra241_cmdqv *cmdqv =
+		container_of(smmu, struct tegra241_cmdqv, smmu);
+	u16 qidx, lidx, idx;
+	u32 regval;
+	int ret;
+
+	/* Reset CMDQV */
+	regval = readl_relaxed(REG_CMDQV(cmdqv, CONFIG));
+	ret = cmdqv_write_config(cmdqv, regval & ~CMDQV_EN);
+	if (ret)
+		return ret;
+	ret = cmdqv_write_config(cmdqv, regval | CMDQV_EN);
+	if (ret)
+		return ret;
+
+	/* Assign preallocated global VCMDQs to each VINTF as LVCMDQs */
+	for (idx = 0, qidx = 0; idx < cmdqv->num_vintfs; idx++) {
+		for (lidx = 0; lidx < cmdqv->num_lvcmdqs_per_vintf; lidx++) {
+			regval  = FIELD_PREP(CMDQV_CMDQ_ALLOC_VINTF, idx);
+			regval |= FIELD_PREP(CMDQV_CMDQ_ALLOC_LVCMDQ, lidx);
+			regval |= CMDQV_CMDQ_ALLOCATED;
+			writel_relaxed(regval,
+				       REG_CMDQV(cmdqv, CMDQ_ALLOC(qidx++)));
+		}
+	}
+
+	return tegra241_vintf_hw_init(cmdqv->vintfs[0], true);
+}
+
+/* VCMDQ Resource Helpers */
+
+static int tegra241_vcmdq_alloc_smmu_cmdq(struct tegra241_vcmdq *vcmdq)
+{
+	struct arm_smmu_device *smmu = &vcmdq->cmdqv->smmu;
+	struct arm_smmu_cmdq *cmdq = &vcmdq->cmdq;
+	struct arm_smmu_queue *q = &cmdq->q;
+	char name[16];
+	u32 regval;
+	int ret;
+
+	snprintf(name, 16, "vcmdq%u", vcmdq->idx);
+
+	/* Cap queue size to SMMU's IDR1.CMDQS and ensure natural alignment */
+	regval = readl_relaxed(smmu->base + ARM_SMMU_IDR1);
+	q->llq.max_n_shift =
+		min_t(u32, CMDQ_MAX_SZ_SHIFT, FIELD_GET(IDR1_CMDQS, regval));
+
+	/* Use the common helper to init the VCMDQ, and then... */
+	ret = arm_smmu_init_one_queue(smmu, q, vcmdq->page0,
+				      TEGRA241_VCMDQ_PROD, TEGRA241_VCMDQ_CONS,
+				      CMDQ_ENT_DWORDS, name);
+	if (ret)
+		return ret;
+
+	/* ...override q_base to write VCMDQ_BASE registers */
+	q->q_base = q->base_dma & VCMDQ_ADDR;
+	q->q_base |= FIELD_PREP(VCMDQ_LOG2SIZE, q->llq.max_n_shift);
+
+	if (!vcmdq->vintf->hyp_own)
+		cmdq->supports_cmd = tegra241_guest_vcmdq_supports_cmd;
+
+	return arm_smmu_cmdq_init(smmu, cmdq);
+}
+
+/* VINTF Logical VCMDQ Resource Helpers */
+
+static void tegra241_vintf_deinit_lvcmdq(struct tegra241_vintf *vintf, u16 lidx)
+{
+	vintf->lvcmdqs[lidx] = NULL;
+}
+
+static int tegra241_vintf_init_lvcmdq(struct tegra241_vintf *vintf, u16 lidx,
+				      struct tegra241_vcmdq *vcmdq)
+{
+	struct tegra241_cmdqv *cmdqv = vintf->cmdqv;
+	u16 idx = vintf->idx;
+
+	vcmdq->idx = idx * cmdqv->num_lvcmdqs_per_vintf + lidx;
+	vcmdq->lidx = lidx;
+	vcmdq->cmdqv = cmdqv;
+	vcmdq->vintf = vintf;
+	vcmdq->page0 = cmdqv->base + TEGRA241_VINTFi_LVCMDQ_PAGE0(idx, lidx);
+	vcmdq->page1 = cmdqv->base + TEGRA241_VINTFi_LVCMDQ_PAGE1(idx, lidx);
+
+	vintf->lvcmdqs[lidx] = vcmdq;
+	return 0;
+}
+
+static void tegra241_vintf_free_lvcmdq(struct tegra241_vintf *vintf, u16 lidx)
+{
+	struct tegra241_vcmdq *vcmdq = vintf->lvcmdqs[lidx];
+	char header[64];
+
+	/* Note that the lvcmdq queue memory space is managed by devres */
+
+	tegra241_vintf_deinit_lvcmdq(vintf, lidx);
+
+	dev_dbg(vintf->cmdqv->dev,
+		"%sdeallocated\n", lvcmdq_error_header(vcmdq, header, 64));
+	kfree(vcmdq);
+}
+
+static struct tegra241_vcmdq *
+tegra241_vintf_alloc_lvcmdq(struct tegra241_vintf *vintf, u16 lidx)
+{
+	struct tegra241_cmdqv *cmdqv = vintf->cmdqv;
+	struct tegra241_vcmdq *vcmdq;
+	char header[64];
+	int ret;
+
+	vcmdq = kzalloc(sizeof(*vcmdq), GFP_KERNEL);
+	if (!vcmdq)
+		return ERR_PTR(-ENOMEM);
+
+	ret = tegra241_vintf_init_lvcmdq(vintf, lidx, vcmdq);
+	if (ret)
+		goto free_vcmdq;
+
+	/* Build an arm_smmu_cmdq for each LVCMDQ */
+	ret = tegra241_vcmdq_alloc_smmu_cmdq(vcmdq);
+	if (ret)
+		goto deinit_lvcmdq;
+
+	dev_dbg(cmdqv->dev,
+		"%sallocated\n", lvcmdq_error_header(vcmdq, header, 64));
+	return vcmdq;
+
+deinit_lvcmdq:
+	tegra241_vintf_deinit_lvcmdq(vintf, lidx);
+free_vcmdq:
+	kfree(vcmdq);
+	return ERR_PTR(ret);
+}
+
+/* VINTF Resource Helpers */
+
+static void tegra241_cmdqv_deinit_vintf(struct tegra241_cmdqv *cmdqv, u16 idx)
+{
+	kfree(cmdqv->vintfs[idx]->lvcmdqs);
+	ida_free(&cmdqv->vintf_ids, idx);
+	cmdqv->vintfs[idx] = NULL;
+}
+
+static int tegra241_cmdqv_init_vintf(struct tegra241_cmdqv *cmdqv, u16 max_idx,
+				     struct tegra241_vintf *vintf)
+{
+
+	u16 idx;
+	int ret;
+
+	ret = ida_alloc_max(&cmdqv->vintf_ids, max_idx, GFP_KERNEL);
+	if (ret < 0)
+		return ret;
+	idx = ret;
+
+	vintf->idx = idx;
+	vintf->cmdqv = cmdqv;
+	vintf->base = cmdqv->base + TEGRA241_VINTF(idx);
+
+	vintf->lvcmdqs = kcalloc(cmdqv->num_lvcmdqs_per_vintf,
+				 sizeof(*vintf->lvcmdqs), GFP_KERNEL);
+	if (!vintf->lvcmdqs) {
+		ida_free(&cmdqv->vintf_ids, idx);
+		return -ENOMEM;
+	}
+
+	cmdqv->vintfs[idx] = vintf;
+	return ret;
+}
+
+/* Remove Helpers */
+
+static void tegra241_vintf_remove_lvcmdq(struct tegra241_vintf *vintf, u16 lidx)
+{
+	tegra241_vcmdq_hw_deinit(vintf->lvcmdqs[lidx]);
+	tegra241_vintf_free_lvcmdq(vintf, lidx);
+}
+
+static void tegra241_cmdqv_remove_vintf(struct tegra241_cmdqv *cmdqv, u16 idx)
+{
+	struct tegra241_vintf *vintf = cmdqv->vintfs[idx];
+	u16 lidx;
+
+	/* Remove LVCMDQ resources */
+	for (lidx = 0; lidx < vintf->cmdqv->num_lvcmdqs_per_vintf; lidx++)
+		if (vintf->lvcmdqs[lidx])
+			tegra241_vintf_remove_lvcmdq(vintf, lidx);
+
+	/* Remove VINTF resources */
+	tegra241_vintf_hw_deinit(vintf);
+
+	dev_dbg(cmdqv->dev, "VINTF%u: deallocated\n", vintf->idx);
+	tegra241_cmdqv_deinit_vintf(cmdqv, idx);
+	kfree(vintf);
+}
+
+static void tegra241_cmdqv_remove(struct arm_smmu_device *smmu)
+{
+	struct tegra241_cmdqv *cmdqv =
+		container_of(smmu, struct tegra241_cmdqv, smmu);
+	u16 idx;
+
+	/* Remove VINTF resources */
+	for (idx = 0; idx < cmdqv->num_vintfs; idx++) {
+		if (cmdqv->vintfs[idx]) {
+			/* Only vintf0 should remain at this stage */
+			WARN_ON(idx > 0);
+			tegra241_cmdqv_remove_vintf(cmdqv, idx);
+		}
+	}
+
+	/* Remove cmdqv resources */
+	ida_destroy(&cmdqv->vintf_ids);
+
+	if (cmdqv->irq > 0)
+		free_irq(cmdqv->irq, cmdqv);
+	iounmap(cmdqv->base);
+	kfree(cmdqv->vintfs);
+	put_device(cmdqv->dev); /* smmu->impl_dev */
+}
+
+static struct arm_smmu_impl_ops tegra241_cmdqv_impl_ops = {
+	.get_secondary_cmdq = tegra241_cmdqv_get_cmdq,
+	.device_reset = tegra241_cmdqv_hw_reset,
+	.device_remove = tegra241_cmdqv_remove,
+};
+
+/* Probe Functions */
+
+static int tegra241_cmdqv_acpi_is_memory(struct acpi_resource *res, void *data)
+{
+	struct resource_win win;
+
+	return !acpi_dev_resource_address_space(res, &win);
+}
+
+static int tegra241_cmdqv_acpi_get_irqs(struct acpi_resource *ares, void *data)
+{
+	struct resource r;
+	int *irq = data;
+
+	if (*irq <= 0 && acpi_dev_resource_interrupt(ares, 0, &r))
+		*irq = r.start;
+	return 1; /* No need to add resource to the list */
+}
+
+static struct resource *
+tegra241_cmdqv_find_acpi_resource(struct device *dev, int *irq)
+{
+	struct acpi_device *adev = to_acpi_device(dev);
+	struct list_head resource_list;
+	struct resource_entry *rentry;
+	struct resource *res = NULL;
+	int ret;
+
+	INIT_LIST_HEAD(&resource_list);
+	ret = acpi_dev_get_resources(adev, &resource_list,
+				     tegra241_cmdqv_acpi_is_memory, NULL);
+	if (ret < 0) {
+		dev_err(dev, "failed to get memory resource: %d\n", ret);
+		return NULL;
+	}
+
+	rentry = list_first_entry_or_null(&resource_list,
+					  struct resource_entry, node);
+	if (!rentry) {
+		dev_err(dev, "failed to get memory resource entry\n");
+		goto free_list;
+	}
+
+	/* Caller must free the res */
+	res = kzalloc(sizeof(*res), GFP_KERNEL);
+	if (!res)
+		goto free_list;
+
+	*res = *rentry->res;
+
+	acpi_dev_free_resource_list(&resource_list);
+
+	INIT_LIST_HEAD(&resource_list);
+
+	if (irq)
+		ret = acpi_dev_get_resources(adev, &resource_list,
+					     tegra241_cmdqv_acpi_get_irqs, irq);
+	if (ret < 0 || !irq || *irq <= 0)
+		dev_warn(dev, "no interrupt. errors will not be reported\n");
+
+free_list:
+	acpi_dev_free_resource_list(&resource_list);
+	return res;
+}
+
+static struct resource *
+tegra241_cmdqv_find_dt_resource(struct device *dev, int *irq)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct resource *res;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(dev, "no memory resource found for CMDQV\n");
+		return NULL;
+	}
+
+	if (irq)
+		*irq = platform_get_irq_byname_optional(pdev, "cmdqv");
+	if (!irq || *irq <= 0)
+		dev_warn(dev, "no interrupt. errors will not be reported\n");
+
+	return res;
+}
+
+static int tegra241_cmdqv_init_structures(struct arm_smmu_device *smmu)
+{
+	struct tegra241_cmdqv *cmdqv =
+		container_of(smmu, struct tegra241_cmdqv, smmu);
+	struct tegra241_vintf *vintf;
+	int lidx;
+	int ret;
+
+	vintf = kzalloc(sizeof(*vintf), GFP_KERNEL);
+	if (!vintf)
+		return -ENOMEM;
+
+	/* Init VINTF0 for in-kernel use */
+	ret = tegra241_cmdqv_init_vintf(cmdqv, 0, vintf);
+	if (ret) {
+		dev_err(cmdqv->dev, "failed to init vintf0: %d\n", ret);
+		return ret;
+	}
+
+	/* Preallocate logical VCMDQs to VINTF0 */
+	for (lidx = 0; lidx < cmdqv->num_lvcmdqs_per_vintf; lidx++) {
+		struct tegra241_vcmdq *vcmdq;
+
+		vcmdq = tegra241_vintf_alloc_lvcmdq(vintf, lidx);
+		if (IS_ERR(vcmdq))
+			return PTR_ERR(vcmdq);
+	}
+
+	/* Now, we are ready to run all the impl ops */
+	smmu->impl_ops = &tegra241_cmdqv_impl_ops;
+	return 0;
+}
+
+#ifdef CONFIG_IOMMU_DEBUGFS
+static struct dentry *cmdqv_debugfs_dir;
+#endif
+
+static struct arm_smmu_device *
+__tegra241_cmdqv_probe(struct arm_smmu_device *smmu, struct resource *res,
+		       int irq)
+{
+	static const struct arm_smmu_impl_ops init_ops = {
+		.init_structures = tegra241_cmdqv_init_structures,
+		.device_remove = tegra241_cmdqv_remove,
+	};
+	struct tegra241_cmdqv *cmdqv = NULL;
+	struct arm_smmu_device *new_smmu;
+	void __iomem *base;
+	u32 regval;
+	int ret;
+
+	static_assert(offsetof(struct tegra241_cmdqv, smmu) == 0);
+
+	base = ioremap(res->start, resource_size(res));
+	if (!base) {
+		dev_err(smmu->dev, "failed to ioremap\n");
+		return NULL;
+	}
+
+	regval = readl(base + TEGRA241_CMDQV_CONFIG);
+	if (disable_cmdqv) {
+		dev_info(smmu->dev, "Detected disable_cmdqv=true\n");
+		writel(regval & ~CMDQV_EN, base + TEGRA241_CMDQV_CONFIG);
+		goto iounmap;
+	}
+
+	cmdqv = devm_krealloc(smmu->dev, smmu, sizeof(*cmdqv), GFP_KERNEL);
+	if (!cmdqv)
+		goto iounmap;
+	new_smmu = &cmdqv->smmu;
+
+	cmdqv->irq = irq;
+	cmdqv->base = base;
+	cmdqv->dev = smmu->impl_dev;
+
+	if (cmdqv->irq > 0) {
+		ret = request_irq(irq, tegra241_cmdqv_isr, 0, "tegra241-cmdqv",
+				  cmdqv);
+		if (ret) {
+			dev_err(cmdqv->dev, "failed to request irq (%d): %d\n",
+				cmdqv->irq, ret);
+			goto iounmap;
+		}
+	}
+
+	regval = readl_relaxed(REG_CMDQV(cmdqv, PARAM));
+	cmdqv->num_vintfs = 1 << FIELD_GET(CMDQV_NUM_VINTF_LOG2, regval);
+	cmdqv->num_vcmdqs = 1 << FIELD_GET(CMDQV_NUM_VCMDQ_LOG2, regval);
+	cmdqv->num_lvcmdqs_per_vintf = cmdqv->num_vcmdqs / cmdqv->num_vintfs;
+
+	cmdqv->vintfs =
+		kcalloc(cmdqv->num_vintfs, sizeof(*cmdqv->vintfs), GFP_KERNEL);
+	if (!cmdqv->vintfs)
+		goto free_irq;
+
+	ida_init(&cmdqv->vintf_ids);
+
+#ifdef CONFIG_IOMMU_DEBUGFS
+	if (!cmdqv_debugfs_dir) {
+		cmdqv_debugfs_dir =
+			debugfs_create_dir("tegra241_cmdqv", iommu_debugfs_dir);
+		debugfs_create_bool("bypass_vcmdq", 0644, cmdqv_debugfs_dir,
+				    &bypass_vcmdq);
+	}
+#endif
+
+	/* Provide init-level ops only, until tegra241_cmdqv_init_structures */
+	new_smmu->impl_ops = &init_ops;
+
+	return new_smmu;
+
+free_irq:
+	if (cmdqv->irq > 0)
+		free_irq(cmdqv->irq, cmdqv);
+iounmap:
+	iounmap(base);
+	return NULL;
+}
+
+struct arm_smmu_device *tegra241_cmdqv_probe(struct arm_smmu_device *smmu)
+{
+	struct arm_smmu_device *new_smmu;
+	struct resource *res = NULL;
+	int irq;
+
+	if (!smmu->dev->of_node)
+		res = tegra241_cmdqv_find_acpi_resource(smmu->impl_dev, &irq);
+	else
+		res = tegra241_cmdqv_find_dt_resource(smmu->impl_dev, &irq);
+	if (!res)
+		goto out_fallback;
+
+	new_smmu = __tegra241_cmdqv_probe(smmu, res, irq);
+	if (!smmu->dev->of_node)
+		kfree(res);
+
+	if (new_smmu)
+		return new_smmu;
+
+out_fallback:
+	dev_info(smmu->impl_dev, "Falling back to standard SMMU CMDQ\n");
+	smmu->options &= ~ARM_SMMU_OPT_TEGRA241_CMDQV;
+	put_device(smmu->impl_dev);
+	return ERR_PTR(-ENODEV);
+}
+
+static const struct of_device_id tegra241_cmdqv_of_match[] = {
+	{ .compatible = "nvidia,tegra264-cmdqv" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, tegra241_cmdqv_of_match);
+
+static struct platform_driver tegra241_cmdqv_driver = {
+	.driver = {
+		.name = "tegra241-cmdqv",
+		.of_match_table = tegra241_cmdqv_of_match,
+	},
+};
+module_platform_driver(tegra241_cmdqv_driver);
+
+MODULE_DESCRIPTION("NVIDIA Tegra241 Command Queue Virtualization Driver");
+MODULE_LICENSE("GPL v2");
--- a/drivers/mailbox/tegra-hsp.c
+++ b/drivers/mailbox/tegra-hsp.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * Copyright (c) 2016-2024, NVIDIA CORPORATION.  All rights reserved.
+ * Copyright (c) 2016-2025, NVIDIA CORPORATION.  All rights reserved.
 */

 #include <linux/delay.h>
@@ -835,7 +835,6 @@ static int tegra_hsp_request_shared_irq(struct tegra_hsp *hsp)
 {
 	unsigned int i, irq = 0;
 	unsigned int default_si = hsp->num_si;
-	unsigned int value;
 	int err;

 	for (i = 0; i < hsp->num_si; i++) {
@@ -853,13 +852,6 @@ static int tegra_hsp_request_shared_irq(struct tegra_hsp *hsp)

 		hsp->shared_irqs[i].enabled = true;

-		value = tegra_hsp_readl(hsp, HSP_INT_IE(i));
-		if (value && !hsp->soc->virtualized) {
-			dev_warn(hsp->dev,
-				 "disabling interrupts for si: %d\n", i);
-			tegra_hsp_writel(hsp, 0, HSP_INT_IE(i));
-		}
-
 		/* Use first available interrupt as default. */
 		if (default_si == hsp->num_si)
 			default_si = i;
--- a/drivers/memory/tegra/mc.c
+++ b/drivers/memory/tegra/mc.c
@@ -987,8 +987,6 @@ static int tegra_mc_probe(struct platform_device *pdev)
 	}

 	if (mc->soc->has_chiplet_arch) {
-		unsigned long intstat;
-
 		/* Unmask MCF interrupts */
 		mc_ch_writel(mc, MC_BROADCAST_CHANNEL, mc->soc->mcf_intmask, MCF_INTMASK_0);
 		mc_ch_writel(mc, MC_BROADCAST_CHANNEL, mc->soc->mcf_intmask, MCF_INTPRIORITY_0);
--- a/drivers/net/phy/realtek.c
+++ b/drivers/net/phy/realtek.c
@@ -1137,7 +1137,6 @@ static struct phy_driver realtek_drvs[] = {
 		.get_wol        = &rtl8211f_get_wol,
                .set_wol        = &rtl8211f_set_wol,
 		.suspend	= rtl821x_suspend,
-		.suspend	= genphy_suspend,
 		.resume		= rtl821x_resume,
 		.read_page	= rtl821x_read_page,
 		.write_page	= rtl821x_write_page,
--- a/drivers/perf/arm_cspmu/nvidia_cspmu.c
+++ b/drivers/perf/arm_cspmu/nvidia_cspmu.c
@@ -11,6 +11,12 @@

 #include "arm_cspmu.h"

+#define PMCNTENSET					0xC00
+#define PMCNTENCLR					0xC20
+#define PMCR						0xE04
+
+#define PMCR_E						BIT(0)
+
 #define NV_PCIE_PORT_COUNT           10ULL
 #define NV_PCIE_FILTER_ID_MASK       GENMASK_ULL(NV_PCIE_PORT_COUNT - 1, 0)

@@ -20,6 +26,16 @@
 #define NV_CNVL_PORT_COUNT           4ULL
 #define NV_CNVL_FILTER_ID_MASK       GENMASK_ULL(NV_CNVL_PORT_COUNT - 1, 0)

+#define NV_UCF_FILTER_ID_MASK        GENMASK_ULL(4, 0)
+
+#define NV_UPHY_FILTER_ID_MASK       GENMASK_ULL(16, 0)
+
+#define NV_VISION_FILTER_ID_MASK     GENMASK_ULL(19, 0)
+
+#define NV_DISPLAY_FILTER_ID_MASK    BIT(0)
+
+#define NV_UCF_GPU_FILTER_ID_MASK    BIT(0)
+
 #define NV_GENERIC_FILTER_ID_MASK    GENMASK_ULL(31, 0)

 #define NV_PRODID_MASK		(ARM_CSPMU_PMIIDR_PRODUCTID |	\
@@ -45,6 +61,7 @@ struct nv_cspmu_ctx {
 	u32 filter_default_val;
 	struct attribute **event_attr;
 	struct attribute **format_attr;
+	u32 *pmcnten;
 };

 static struct attribute *scf_pmu_event_attrs[] = {
@@ -178,6 +195,72 @@ static struct attribute *mcf_pmu_event_attrs[] = {
 	NULL,
 };

+static struct attribute *ucf_pmu_event_attrs[] = {
+	ARM_CSPMU_EVENT_ATTR(slc_allocate,			0xf0),
+	ARM_CSPMU_EVENT_ATTR(slc_refill,			0xf1),
+	ARM_CSPMU_EVENT_ATTR(slc_access,			0xf2),
+	ARM_CSPMU_EVENT_ATTR(slc_wb,				0xf3),
+	ARM_CSPMU_EVENT_ATTR(slc_hit,				0x118),
+	ARM_CSPMU_EVENT_ATTR(slc_access_wr,			0x112),
+	ARM_CSPMU_EVENT_ATTR(slc_access_rd,			0x111),
+	ARM_CSPMU_EVENT_ATTR(slc_refill_wr,			0x10a),
+	ARM_CSPMU_EVENT_ATTR(slc_refill_rd,			0x109),
+	ARM_CSPMU_EVENT_ATTR(slc_hit_wr,			0x11a),
+	ARM_CSPMU_EVENT_ATTR(slc_hit_rd,			0x119),
+	ARM_CSPMU_EVENT_ATTR(slc_access_dataless,		0x183),
+	ARM_CSPMU_EVENT_ATTR(slc_access_atomic,			0x184),
+	ARM_CSPMU_EVENT_ATTR(local_snoop,			0x180),
+	ARM_CSPMU_EVENT_ATTR(ext_snp_access,			0x181),
+	ARM_CSPMU_EVENT_ATTR(ext_snp_evict,			0x182),
+
+	ARM_CSPMU_EVENT_ATTR(ucf_bus_cycles,			0x1d),
+
+	ARM_CSPMU_EVENT_ATTR(any_access_wr,			0x112),
+	ARM_CSPMU_EVENT_ATTR(any_access_rd,			0x111),
+	ARM_CSPMU_EVENT_ATTR(any_byte_wr,			0x114),
+	ARM_CSPMU_EVENT_ATTR(any_byte_rd,			0x113),
+	ARM_CSPMU_EVENT_ATTR(any_outstanding_rd,		0x115),
+
+	ARM_CSPMU_EVENT_ATTR(local_dram_access_wr,		0x122),
+	ARM_CSPMU_EVENT_ATTR(local_dram_access_rd,		0x121),
+	ARM_CSPMU_EVENT_ATTR(local_dram_byte_wr,		0x124),
+	ARM_CSPMU_EVENT_ATTR(local_dram_byte_rd,		0x123),
+
+	ARM_CSPMU_EVENT_ATTR(mmio_access_wr,			0x132),
+	ARM_CSPMU_EVENT_ATTR(mmio_access_rd,			0x131),
+	ARM_CSPMU_EVENT_ATTR(mmio_byte_wr,			0x134),
+	ARM_CSPMU_EVENT_ATTR(mmio_byte_rd,			0x133),
+	ARM_CSPMU_EVENT_ATTR(mmio_outstanding_rd,		0x135),
+
+	ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT),
+
+	NULL,
+};
+
+static struct attribute *display_pmu_event_attrs[] = {
+	ARM_CSPMU_EVENT_ATTR(rd_bytes_loc,			0x0),
+	ARM_CSPMU_EVENT_ATTR(rd_req_loc,			0x6),
+	ARM_CSPMU_EVENT_ATTR(rd_cum_outs_loc,			0xc),
+
+	ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT),
+
+	NULL,
+};
+
+static struct attribute *ucf_gpu_pmu_event_attrs[] = {
+	ARM_CSPMU_EVENT_ATTR(rd_bytes_loc_rem,			0x0),
+	ARM_CSPMU_EVENT_ATTR(wr_bytes_loc,			0x2),
+	ARM_CSPMU_EVENT_ATTR(wr_bytes_rem,			0x3),
+	ARM_CSPMU_EVENT_ATTR(rd_req_loc_rem,			0x6),
+	ARM_CSPMU_EVENT_ATTR(wr_req_loc,			0x8),
+	ARM_CSPMU_EVENT_ATTR(wr_req_rem,			0x9),
+	ARM_CSPMU_EVENT_ATTR(rd_cum_outs_loc_rem,		0xc),
+
+	ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT),
+
+	NULL,
+};
+
 static struct attribute *generic_pmu_event_attrs[] = {
 	ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT),
 	NULL,
@@ -205,6 +288,54 @@ static struct attribute *cnvlink_pmu_format_attrs[] = {
 	NULL,
 };

+static struct attribute *ucf_pmu_format_attrs[] = {
+	ARM_CSPMU_FORMAT_EVENT_ATTR,
+	ARM_CSPMU_FORMAT_ATTR(src_loc_noncpu, "config1:0"),
+	ARM_CSPMU_FORMAT_ATTR(src_loc_cpu, "config1:1"),
+	ARM_CSPMU_FORMAT_ATTR(src_rem, "config1:2"),
+	ARM_CSPMU_FORMAT_ATTR(dst_loc, "config1:3"),
+	ARM_CSPMU_FORMAT_ATTR(dst_rem, "config1:4"),
+	NULL,
+};
+
+static struct attribute *display_pmu_format_attrs[] = {
+	ARM_CSPMU_FORMAT_EVENT_ATTR,
+	NULL,
+};
+
+static struct attribute *ucf_gpu_pmu_format_attrs[] = {
+	ARM_CSPMU_FORMAT_EVENT_ATTR,
+	NULL,
+};
+
+static struct attribute *uphy_pmu_format_attrs[] = {
+	ARM_CSPMU_FORMAT_EVENT_ATTR,
+	ARM_CSPMU_FORMAT_ATTR(pcie_rp_1, "config1:0"),
+	ARM_CSPMU_FORMAT_ATTR(pcie_rp_2, "config1:1"),
+	ARM_CSPMU_FORMAT_ATTR(pcie_rp_3, "config1:2"),
+	ARM_CSPMU_FORMAT_ATTR(pcie_rp_4, "config1:3"),
+	ARM_CSPMU_FORMAT_ATTR(pcie_rp_5, "config1:4"),
+	ARM_CSPMU_FORMAT_ATTR(xusb, "config1:5-10"),
+	ARM_CSPMU_FORMAT_ATTR(mgbe_0, "config1:11"),
+	ARM_CSPMU_FORMAT_ATTR(mgbe_1, "config1:12"),
+	ARM_CSPMU_FORMAT_ATTR(mgbe_2, "config1:13"),
+	ARM_CSPMU_FORMAT_ATTR(mgbe_3, "config1:14"),
+	ARM_CSPMU_FORMAT_ATTR(eqos, "config1:15"),
+	ARM_CSPMU_FORMAT_ATTR(ufs, "config1:16"),
+	NULL,
+};
+
+static struct attribute *vision_pmu_format_attrs[] = {
+	ARM_CSPMU_FORMAT_EVENT_ATTR,
+	ARM_CSPMU_FORMAT_ATTR(vi_0, "config1:0-1"),
+	ARM_CSPMU_FORMAT_ATTR(vi_1, "config1:2-3"),
+	ARM_CSPMU_FORMAT_ATTR(isp_0, "config1:4-7"),
+	ARM_CSPMU_FORMAT_ATTR(isp_1, "config1:8-11"),
+	ARM_CSPMU_FORMAT_ATTR(vic, "config1:12-13"),
+	ARM_CSPMU_FORMAT_ATTR(pva, "config1:14-19"),
+	NULL,
+};
+
 static struct attribute *generic_pmu_format_attrs[] = {
 	ARM_CSPMU_FORMAT_EVENT_ATTR,
 	ARM_CSPMU_FORMAT_FILTER_ATTR,
@@ -246,6 +377,43 @@ static u32 nv_cspmu_event_filter(const struct perf_event *event)
 	return event->attr.config1 & ctx->filter_mask;
 }

+/*
+ * UCF leakage workaround:
+ * Disables PMCR and PMCNTEN for each counter before running a
+ * dummy experiment. This clears the internal state and prevents
+ * event leakage from the previous experiment. PMCNTEN is then
+ * re-enabled.
+ */
+static void ucf_pmu_stop_counters_leakage(struct arm_cspmu *cspmu)
+{
+	int reg_id;
+	u32 cntenclr_offset = PMCNTENCLR;
+	u32 cntenset_offset = PMCNTENSET;
+	struct nv_cspmu_ctx *ctx = to_nv_cspmu_ctx(cspmu);
+
+	/* Step 1: Disable PMCR.E */
+	writel(0, cspmu->base0 + PMCR);
+
+	/* Step 2: Clear PMCNTEN for all counters */
+	for (reg_id = 0; reg_id < cspmu->num_set_clr_reg; ++reg_id) {
+		ctx->pmcnten[reg_id] = readl(cspmu->base0 + cntenclr_offset);
+		writel(ctx->pmcnten[reg_id], cspmu->base0 + cntenclr_offset);
+		cntenclr_offset += sizeof(u32);
+	}
+
+	/* Step 3: Enable PMCR.E */
+	writel(PMCR_E, cspmu->base0 + PMCR);
+
+	/* Step 4: Disable PMCR.E */
+	writel(0, cspmu->base0 + PMCR);
+
+	/* Step 5: Enable back PMCNTEN for counters cleared in step 2 */
+	for (reg_id = 0; reg_id < cspmu->num_set_clr_reg; ++reg_id) {
+		writel(ctx->pmcnten[reg_id], cspmu->base0 + cntenset_offset);
+		cntenset_offset += sizeof(u32);
+	}
+}
+
 enum nv_cspmu_name_fmt {
 	NAME_FMT_GENERIC,
 	NAME_FMT_SOCKET
@@ -260,6 +428,7 @@ struct nv_cspmu_match {
 	enum nv_cspmu_name_fmt name_fmt;
 	struct attribute **event_attr;
 	struct attribute **format_attr;
+	void (*stop_counters)(struct arm_cspmu *cspmu);
 };

 static const struct nv_cspmu_match nv_cspmu_match[] = {
@@ -313,6 +482,57 @@ static const struct nv_cspmu_match nv_cspmu_match[] = {
 	  .event_attr = scf_pmu_event_attrs,
 	  .format_attr = scf_pmu_format_attrs
 	},
+	{
+	  .prodid = 0x2CF10000,
+	  .prodid_mask = NV_PRODID_MASK,
+	  .filter_mask = NV_UCF_FILTER_ID_MASK,
+	  .filter_default_val = NV_UCF_FILTER_ID_MASK,
+	  .name_pattern = "nvidia_ucf_pmu_%u",
+	  .name_fmt = NAME_FMT_SOCKET,
+	  .event_attr = ucf_pmu_event_attrs,
+	  .format_attr = ucf_pmu_format_attrs,
+	  .stop_counters = ucf_pmu_stop_counters_leakage
+	},
+	{
+	  .prodid = 0x10800000,
+	  .prodid_mask = NV_PRODID_MASK,
+	  .filter_mask = NV_UPHY_FILTER_ID_MASK,
+	  .filter_default_val = NV_UPHY_FILTER_ID_MASK,
+	  .name_pattern = "nvidia_uphy_pmu_%u",
+	  .name_fmt = NAME_FMT_SOCKET,
+	  .event_attr = mcf_pmu_event_attrs,
+	  .format_attr = uphy_pmu_format_attrs
+	},
+	{
+	  .prodid = 0x10a00000,
+	  .prodid_mask = NV_PRODID_MASK,
+	  .filter_mask = 0,
+	  .filter_default_val = NV_UCF_GPU_FILTER_ID_MASK,
+	  .name_pattern = "nvidia_ucf_gpu_pmu_%u",
+	  .name_fmt = NAME_FMT_SOCKET,
+	  .event_attr = ucf_gpu_pmu_event_attrs,
+	  .format_attr = ucf_gpu_pmu_format_attrs
+	},
+	{
+	  .prodid = 0x10d00000,
+	  .prodid_mask = NV_PRODID_MASK,
+	  .filter_mask = 0,
+	  .filter_default_val = NV_DISPLAY_FILTER_ID_MASK,
+	  .name_pattern = "nvidia_display_pmu_%u",
+	  .name_fmt = NAME_FMT_SOCKET,
+	  .event_attr = display_pmu_event_attrs,
+	  .format_attr = display_pmu_format_attrs
+	},
+	{
+	  .prodid = 0x10e00000,
+	  .prodid_mask = NV_PRODID_MASK,
+	  .filter_mask = NV_VISION_FILTER_ID_MASK,
+	  .filter_default_val = NV_VISION_FILTER_ID_MASK,
+	  .name_pattern = "nvidia_vision_pmu_%u",
+	  .name_fmt = NAME_FMT_SOCKET,
+	  .event_attr = mcf_pmu_event_attrs,
+	  .format_attr = vision_pmu_format_attrs
+	},
 	{
 	  .prodid = 0,
 	  .prodid_mask = 0,
@@ -389,6 +609,13 @@ static int nv_cspmu_init_ops(struct arm_cspmu *cspmu)
 	impl_ops->get_event_attrs		= nv_cspmu_get_event_attrs;
 	impl_ops->get_format_attrs		= nv_cspmu_get_format_attrs;
 	impl_ops->get_name			= nv_cspmu_get_name;
+	if (match->stop_counters != NULL) {
+		ctx->pmcnten = devm_kzalloc(dev, cspmu->num_set_clr_reg *
+					     sizeof(u32), GFP_KERNEL);
+		if (!ctx->pmcnten)
+			return -ENOMEM;
+		impl_ops->stop_counters		= match->stop_counters;
+	}

 	return 0;
 }
--- a/drivers/platform/tegra/Kconfig
+++ b/drivers/platform/tegra/Kconfig
@@ -16,7 +16,7 @@ menuconfig TEGRA_PLATFORM_DEVICES
 if TEGRA_PLATFORM_DEVICES

 config TEGRA_EPL
-	bool "Tegra Error Propagation Layer Driver"
+	tristate "Tegra Error Propagation Layer Driver"
 	depends on MAILBOX
 	help
 	  The tegra-epl driver provides interface for reporting software detected
--- a/drivers/platform/tegra/tegra-epl.c
+++ b/drivers/platform/tegra/tegra-epl.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-// Copyright (c) 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright (c) 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.

 #include <linux/module.h>
 #include <linux/fs.h>
@@ -14,7 +14,7 @@
 #include <linux/pm.h>

 /* Timeout in milliseconds */
-#define TIMEOUT		5U
+#define TIMEOUT		13U

 /* 32bit data Length */
 #define MAX_LEN	4
@@ -22,12 +22,19 @@
 /* Macro indicating total number of Misc Sw generic errors in Misc EC */
 #define NUM_SW_GENERIC_ERR 5U

-/* Error index offset in mission status register */
-#define ERROR_INDEX_OFFSET	24U
-
 /* signature code for HSP pm notify data */
 #define PM_STATE_UNI_CODE	0xFDEF

+/* Timestamp validation constants */
+#define TIMESTAMP_CNT_PERIOD	0x100000000ULL  /* 32-bit SoC TSC counter period (2^32) */
+
+/* This value is derived from the DOS FDTI (100ms) - EPL propagation delay (10ms) */
+#define TIMESTAMP_VALID_RANGE	90000000ULL  /* 90ms in nanoseconds */
+
+/* Timestamp resolution constants (in nanoseconds) */
+#define TEGRA234_TIMESTAMP_RESOLUTION_NS	32U
+#define TEGRA264_TIMESTAMP_RESOLUTION_NS	1U
+
 /* State Management */
 #define EPS_DOS_INIT                0U
 #define EPS_DOS_SUSPEND             3U
@@ -60,6 +67,12 @@ struct epl_misc_sw_err_cfg {
 	const char *dev_configured;
 };

+/* Error index offset in mission status register */
+static uint32_t error_index_offset = 3;
+
+/* Timestamp resolution for current SoC (in nanoseconds) */
+static uint32_t timestamp_resolution_ns = TEGRA264_TIMESTAMP_RESOLUTION_NS;
+
 static int device_file_major_number;
 static const char device_name[] = "epdaemon";

@@ -80,6 +93,21 @@ static uint32_t handshake_retry_count;

 static bool enable_deinit_notify;

+/* Helper function to SoC TSC timestamp */
+static inline uint32_t epl_get_current_timestamp(void)
+{
+	uint32_t timestamp;
+
+	asm volatile("mrs %0, cntvct_el0" : "=r" (timestamp));
+	return timestamp;
+}
+
+/* Helper function to convert SoC TSC timestamp ticks to nanoseconds */
+static inline uint64_t epl_ticks_to_ns(uint64_t ticks)
+{
+	return ticks * timestamp_resolution_ns;
+}
+
 static void tegra_hsp_tx_empty_notify(struct mbox_client *cl,
 					 void *data, int empty_value)
 {
@@ -113,21 +141,21 @@ static int tegra_hsp_mb_init(struct device *dev)
 static ssize_t device_file_ioctl(
 			struct file *fp, unsigned int cmd, unsigned long arg)
 {
-	uint32_t lData[MAX_LEN];
+	struct epl_error_report_frame error_frame;
 	int ret;

-	if (copy_from_user(lData, (void __user *)arg,
-				 MAX_LEN * sizeof(uint32_t)))
+	/* Validate input parameters */
+	if (!arg)
+		return -EINVAL;
+
+	if (copy_from_user(&error_frame, (void __user *)arg,
+				 sizeof(error_frame)))
 		return -EACCES;

 	switch (cmd) {

 	case EPL_REPORT_ERROR_CMD:
-		if (hs_state == HANDSHAKE_DONE)
-			ret = mbox_send_message(epl_hsp_v->tx.chan, (void *) lData);
-		else
-			ret = -ENODEV;
-
+		ret = epl_report_error(error_frame);
 		break;
 	default:
 		return -EINVAL;
@@ -148,12 +176,16 @@ int epl_get_misc_ec_err_status(struct device *dev, uint8_t err_number, bool *sta
 		if (miscerr_cfg[err_number].dev_configured == NULL || isAddrMappOk == false)
 			return -ENODEV;

+		/* Validate mission error status register mapping */
+		if (!mission_err_status_va)
+			return -ENODEV;
+
 		dev_str = dev_driver_string(dev);

 		if (strcmp(dev_str, miscerr_cfg[err_number].dev_configured) != 0)
 			return -EACCES;

-		mask = (1U << ((ERROR_INDEX_OFFSET + err_number) % 32U));
+		mask = (1U << ((error_index_offset + err_number) % 32U));
 		mission_err_status = readl(mission_err_status_va);

 		if ((mission_err_status & mask) != 0U)
@@ -182,6 +214,10 @@ int epl_report_misc_ec_error(struct device *dev, uint8_t err_number,
 	if (status == false)
 		return -EAGAIN;

+	/* Validate register mappings before writing */
+	if (!miscerr_cfg[err_number].err_code_va || !miscerr_cfg[err_number].err_assert_va)
+		return -ENODEV;
+
 	/* Updating error code */
 	writel(sw_error_code, miscerr_cfg[err_number].err_code_va);

@@ -195,9 +231,39 @@ EXPORT_SYMBOL_GPL(epl_report_misc_ec_error);
 int epl_report_error(struct epl_error_report_frame error_report)
 {
 	int ret = -EINVAL;
+	uint64_t current_timestamp_64;
+	uint64_t reported_timestamp_64;

-	if (epl_hsp_v == NULL || hs_state != HANDSHAKE_DONE) {
+	/* Validate input parameters */
+	if (epl_hsp_v == NULL || hs_state != HANDSHAKE_DONE)
 		return -ENODEV;
+
+	/* Validate HSP channel */
+	if (!epl_hsp_v->tx.chan)
+		return -ENODEV;
+
+	/* Plausibility check for timestamp - only if timestamp is not zero */
+	if (error_report.timestamp != 0) {
+		/* Get current timestamp (32-bit LSB) and convert to 64-bit for calculations */
+		current_timestamp_64 = (uint64_t)epl_get_current_timestamp();
+		reported_timestamp_64 = (uint64_t)error_report.timestamp;
+
+		/* Check for timestamp overflow */
+		/* If current timestamp is less than reported timestamp, assume overflow occurred */
+		if (current_timestamp_64 < reported_timestamp_64)
+			current_timestamp_64 += TIMESTAMP_CNT_PERIOD;
+
+		/* Validate timestamp range - reject if difference is more than ~90ms */
+		/* Convert 90ms to counter ticks based on current resolution */
+		uint64_t valid_range_ticks = TIMESTAMP_VALID_RANGE / timestamp_resolution_ns;
+
+		if ((current_timestamp_64 - reported_timestamp_64) > valid_range_ticks) {
+			dev_warn(&epl_hsp_v->dev, "epl_report_error: Invalid timestamp - difference %llu ticks (%llu ns) exceeds valid range (%llu ticks)\n",
+				current_timestamp_64 - reported_timestamp_64,
+				epl_ticks_to_ns(current_timestamp_64 - reported_timestamp_64),
+				valid_range_ticks);
+			return -EINVAL;
+		}
 	}

 	ret = mbox_send_message(epl_hsp_v->tx.chan, (void *)&error_report);
@@ -211,12 +277,16 @@ static int epl_client_fsi_pm_notify(u32 state)
 	int ret;
 	u32 pdata[4];

+	/* Validate state parameter */
+	if (state > EPS_DOS_UNKNOWN)
+		return -EINVAL;
+
 	pdata[0] = PM_STATE_UNI_CODE;
 	pdata[1] = state;
 	pdata[2] = state;
 	pdata[3] = PM_STATE_UNI_CODE;

-	if (hs_state == HANDSHAKE_DONE)
+	if (hs_state == HANDSHAKE_DONE && epl_hsp_v && epl_hsp_v->tx.chan)
 		ret = mbox_send_message(epl_hsp_v->tx.chan, (void *) pdata);
 	else
 		ret = -ENODEV;
@@ -228,7 +298,7 @@ static int epl_client_fsi_handshake(void *arg)
 {
 	uint8_t count = 0;

-	if (epl_hsp_v) {
+	if (epl_hsp_v && epl_hsp_v->tx.chan) {
 		int ret;
 		const uint32_t handshake_data[] = {0x45504C48, 0x414E4453, 0x48414B45,
 			0x44415441};
@@ -244,12 +314,15 @@ static int epl_client_fsi_handshake(void *arg)
 				break;
 			}
 		} while (count < handshake_retry_count);
+	} else {
+		hs_state = HANDSHAKE_FAILED;
+		dev_warn(&pdev_local->dev, "epl_client: handshake failed - no valid HSP channel\n");
 	}

 	if (hs_state == HANDSHAKE_FAILED)
-		pr_warn("epl_client: handshake with FSI failed\n");
+		dev_warn(&pdev_local->dev, "epl_client: handshake with FSI failed\n");
 	else
-		pr_info("epl_client: handshake done with FSI, try %u\n", count);
+		dev_info(&pdev_local->dev, "epl_client: handshake done with FSI, try %u\n", count);

 	return 0;
 }
@@ -257,10 +330,14 @@ static int epl_client_fsi_handshake(void *arg)
 static int __maybe_unused epl_client_suspend(struct device *dev)
 {
 	int ret = 0;
-	pr_debug("tegra-epl: suspend called\n");

-	if (enable_deinit_notify)
+	dev_dbg(dev, "tegra-epl: suspend called\n");
+
+	if (enable_deinit_notify) {
 		ret = epl_client_fsi_pm_notify(EPS_DOS_SUSPEND);
+		if (ret < 0)
+			dev_warn(dev, "tegra-epl: suspend notification failed: %d\n", ret);
+	}
 	hs_state = HANDSHAKE_PENDING;

 	return ret;
@@ -268,15 +345,32 @@ static int __maybe_unused epl_client_suspend(struct device *dev)

 static int __maybe_unused epl_client_resume(struct device *dev)
 {
-	pr_debug("tegra-epl: resume called\n");
+	int ret;

-	(void)epl_client_fsi_handshake(NULL);
-	return epl_client_fsi_pm_notify(EPS_DOS_RESUME);
+	dev_dbg(dev, "tegra-epl: resume called\n");
+
+	ret = epl_client_fsi_handshake(NULL);
+	if (ret < 0) {
+		dev_warn(dev, "tegra-epl: handshake failed during resume: %d\n", ret);
+		return ret;
+	}
+
+	/* Only send PM notify if handshake was successful */
+	if (hs_state == HANDSHAKE_DONE) {
+		ret = epl_client_fsi_pm_notify(EPS_DOS_RESUME);
+		if (ret < 0)
+			dev_warn(dev, "tegra-epl: resume notification failed: %d\n", ret);
+	} else {
+		dev_warn(dev, "tegra-epl: skipping resume notification - handshake not successful\n");
+	}
+
+	return ret;
 }
 static SIMPLE_DEV_PM_OPS(epl_client_pm, epl_client_suspend, epl_client_resume);

 static const struct of_device_id epl_client_dt_match[] = {
 	{ .compatible = "nvidia,tegra234-epl-client"},
+	{ .compatible = "nvidia,tegra264-epl-client"},
 	{}
 };

@@ -299,6 +393,7 @@ static int epl_register_device(void)
 		return result;
 	}
 	device_file_major_number = result;
+
 	dev_class = class_create(device_name);
 	if (dev_class == NULL) {
 		pr_err("%s> Could not create class for device\n", device_name);
@@ -333,18 +428,30 @@ static int epl_client_probe(struct platform_device *pdev)
 	const struct device_node *np = dev->of_node;
 	int iterator = 0;
 	char name[32] = "client-misc-sw-generic-err";
+	bool is_misc_ec_mapped = false;

 	hs_state = HANDSHAKE_PENDING;

-	epl_register_device();
+	ret = epl_register_device();
+	if (ret < 0) {
+		dev_err(dev, "Failed to register device: %d\n", ret);
+		return ret;
+	}
+
 	ret = tegra_hsp_mb_init(dev);
+	if (ret < 0) {
+		dev_err(dev, "Failed to initialize HSP mailbox: %d\n", ret);
+		epl_unregister_device();
+		return ret;
+	}
+
 	pdev_local = pdev;

 	for (iterator = 0; iterator < NUM_SW_GENERIC_ERR; iterator++) {
 		name[26] = (char)(iterator+48U);
 		name[27] = '\0';
 		if (of_property_read_string(np, name, &miscerr_cfg[iterator].dev_configured) == 0) {
-			pr_info("Misc Sw Generic Err #%d configured to client %s\n",
+			dev_info(dev, "Misc Sw Generic Err #%d configured to client %s\n",
 					iterator, miscerr_cfg[iterator].dev_configured);

 			/* Mapping registers to process address space */
@@ -359,9 +466,12 @@ static int epl_client_probe(struct platform_device *pdev)
 				ret = -1;
 				dev_err(&pdev->dev, "error in mapping misc err register for err #%d\n",
 						iterator);
+			} else {
+				is_misc_ec_mapped = true;
 			}
 		} else {
-			pr_info("Misc Sw Generic Err %d not configured for any client\n", iterator);
+			dev_info(dev, "Misc Sw Generic Err %d not configured for any client\n",
+				 iterator);
 		}
 	}

@@ -374,16 +484,41 @@ static int epl_client_probe(struct platform_device *pdev)

 	dev_info(dev, "handshake-retry-count %u\n", handshake_retry_count);

-	mission_err_status_va = devm_platform_ioremap_resource(pdev, NUM_SW_GENERIC_ERR * 2);
-	if (IS_ERR(mission_err_status_va)) {
-		isAddrMappOk = false;
-		dev_err(&pdev->dev, "error in mapping mission error status register\n");
-		return PTR_ERR(mission_err_status_va);
+	if (of_device_is_compatible(np, "nvidia,tegra234-epl-client")) {
+		error_index_offset = 24;
+		timestamp_resolution_ns = TEGRA234_TIMESTAMP_RESOLUTION_NS;
+	} else if (of_device_is_compatible(np, "nvidia,tegra264-epl-client")) {
+		error_index_offset = 3;
+		timestamp_resolution_ns = TEGRA264_TIMESTAMP_RESOLUTION_NS;
+	} else {
+		dev_err(dev, "tegra-epl: valid dt compatible string not found\n");
+		ret = -1;
+	}
+
+	if (is_misc_ec_mapped == true) {
+		mission_err_status_va = devm_platform_ioremap_resource(pdev, NUM_SW_GENERIC_ERR * 2);
+		if (IS_ERR(mission_err_status_va)) {
+			isAddrMappOk = false;
+			dev_err(&pdev->dev, "error in mapping mission error status register\n");
+			return PTR_ERR(mission_err_status_va);
+		}
 	}

 	if (ret == 0) {
-		(void) epl_client_fsi_handshake(NULL);
-		return epl_client_fsi_pm_notify(EPS_DOS_INIT);
+		ret = epl_client_fsi_handshake(NULL);
+		if (ret < 0) {
+			dev_warn(dev, "tegra-epl: handshake failed during probe: %d\n", ret);
+			return ret;
+		}
+
+		/* Only send PM notify if handshake was successful */
+		if (hs_state == HANDSHAKE_DONE) {
+			ret = epl_client_fsi_pm_notify(EPS_DOS_INIT);
+			if (ret < 0)
+				dev_warn(dev, "tegra-epl: init notification failed: %d\n", ret);
+		} else {
+			dev_warn(dev, "tegra-epl: skipping init notification - handshake not successful\n");
+		}
 	}

 	return ret;
@@ -391,11 +526,15 @@ static int epl_client_probe(struct platform_device *pdev)

 static void epl_client_shutdown(struct platform_device *pdev)
 {
-	pr_debug("tegra-epl: shutdown called\n");
+	int ret;

-	if (enable_deinit_notify)
-		if (epl_client_fsi_pm_notify(EPS_DOS_DEINIT) < 0)
-			pr_err("Unable to send notification to fsi\n");
+	dev_dbg(&pdev->dev, "tegra-epl: shutdown called\n");
+
+	if (enable_deinit_notify) {
+		ret = epl_client_fsi_pm_notify(EPS_DOS_DEINIT);
+		if (ret < 0)
+			dev_err(&pdev->dev, "Unable to send notification to fsi: %d\n", ret);
+	}

 	hs_state = HANDSHAKE_PENDING;

--- a/drivers/pwm/pwm-tegra.c
+++ b/drivers/pwm/pwm-tegra.c
@@ -77,6 +77,7 @@
 #include <linux/clk.h>
 #include <linux/err.h>
 #include <linux/io.h>
+#include <linux/math64.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/pm_opp.h>
@@ -147,7 +148,7 @@ static inline void pwm_writel_mask32(struct tegra_pwm_chip *pc,
 }

 static int tegra_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
-			    int duty_ns, int period_ns)
+			    u64 duty_ns, u64 period_ns)
 {
 	struct tegra_pwm_chip *pc = to_tegra_pwm_chip(chip);
 	unsigned long channel_o = pc->soc->channel_offset;
@@ -157,6 +158,7 @@ static int tegra_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
 	unsigned long scale_s = pc->soc->scale_shift;
 	unsigned long required_clk_rate;
 	u32 pwm_f, pfm_f;
+	u64 val;
 	int err;

 	/*
@@ -170,8 +172,11 @@ static int tegra_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
 	 * per (1 + pc->pwm_depth) cycles and make sure to round to the
 	 * nearest integer during division.
 	 */
-	pwm_f = (u32)DIV_ROUND_CLOSEST_ULL(duty_ns * (1 + pc->pwm_depth),
-					   period_ns);
+	val = mul_u64_u64_div_u64(duty_ns, 1 + pc->pwm_depth, period_ns);
+	if (val > U32_MAX)
+		return -EINVAL;
+
+	pwm_f = (u32)val;

 	/* Avoid overflow on 100% duty cycle */
 	if (pwm_f == 1 + pc->pwm_depth)
@@ -182,8 +187,11 @@ static int tegra_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
 	 * required_clk_rate is a reference rate for source clock and
 	 * it is derived based on user requested period.
 	 */
-	required_clk_rate = DIV_ROUND_UP_ULL(
-		(u64)NSEC_PER_SEC * (1 + pc->pwm_depth), period_ns);
+	val = mul_u64_u64_div_u64(NSEC_PER_SEC, 1 + pc->pwm_depth, period_ns);
+	if (val > U32_MAX)
+		return -EINVAL;
+
+	required_clk_rate = (u32)val;
 	pc->clk_rate = clk_get_rate(pc->clk);
 	if (pc->clk_rate < required_clk_rate)
 		return -EINVAL;
--- a/drivers/soc/tegra/cbb/tegra234-cbb.c
+++ b/drivers/soc/tegra/cbb/tegra234-cbb.c
@@ -1491,6 +1491,10 @@ static int __maybe_unused tegra234_cbb_resume_noirq(struct device *dev)
 {
 	struct tegra234_cbb *cbb = dev_get_drvdata(dev);

+	/* set ERD bit to mask SError and generate interrupt to report error */
+	if (cbb->fabric->off_mask_erd)
+		tegra234_cbb_mask_serror(cbb);
+
 	tegra234_cbb_error_enable(&cbb->base);

 	dev_dbg(dev, "%s resumed\n", cbb->fabric->fab_list[cbb->fabric->fab_id].name);
--- a/drivers/soc/tegra/pmc.c
+++ b/drivers/soc/tegra/pmc.c
@@ -186,6 +186,29 @@
 #define  TEGRA_SMC_PMC_READ	0xaa
 #define  TEGRA_SMC_PMC_WRITE	0xbb

+/* Bit field definitions for scratch registers */
+#define BR_FAIL_BITMAP_MASK		0xF
+#define BR_FAIL_BITMAP_SHIFT		0
+#define BR_ACTIVE_CHAIN_MASK		0x3
+#define BR_ACTIVE_CHAIN_SHIFT		4
+#define BOOT_CHAIN_STATUS_A_MASK	0x1
+#define BOOT_CHAIN_STATUS_A_SHIFT	7
+#define BOOT_CHAIN_STATUS_B_MASK	0x1
+#define BOOT_CHAIN_STATUS_B_SHIFT	8
+#define BOOT_CHAIN_CURRENT_MASK		0x3
+#define BOOT_CHAIN_CURRENT_SHIFT	11
+#define LAST_BOOT_CHAIN_FAILED_MASK	0x1
+#define LAST_BOOT_CHAIN_FAILED_SHIFT	13
+
+#define ROOTFS_SR_MAGIC_MASK		0xFFFF
+#define ROOTFS_SR_MAGIC_SHIFT		0
+#define ROOTFS_CURRENT_MASK		0x3
+#define ROOTFS_CURRENT_SHIFT		16
+#define ROOTFS_RETRY_COUNT_B_MASK	0x3
+#define ROOTFS_RETRY_COUNT_B_SHIFT	18
+#define ROOTFS_RETRY_COUNT_A_MASK	0x3
+#define ROOTFS_RETRY_COUNT_A_SHIFT	20
+
 struct pmc_clk {
 	struct clk_hw	hw;
 	unsigned long	offs;
@@ -2095,6 +2118,156 @@ void tegra_pmc_reset_sysfs_remove(struct tegra_pmc *pmc)
 	device_remove_file(dev, &dev_attr_reset_level);
 }

+/* Helper macros for scratch sysfs attributes */
+#define TEGRA_PMC_SCRATCH_ATTR_RW(name, reg_field, mask, shift, min_val, max_val) \
+static ssize_t name##_show(struct device *dev, struct device_attribute *attr, char *buf) \
+{ \
+	struct tegra_pmc *pmc = dev_get_drvdata(dev); \
+	u32 value; \
+	if (!pmc->scratch || !pmc->soc->regs->reg_field) \
+		return -ENODEV; \
+	value = tegra_pmc_scratch_readl(pmc, pmc->soc->regs->reg_field); \
+	value = (value >> shift) & mask; \
+	return sprintf(buf, "%u\n", value); \
+} \
+static ssize_t name##_store(struct device *dev, struct device_attribute *attr, \
+			    const char *buf, size_t count) \
+{ \
+	struct tegra_pmc *pmc = dev_get_drvdata(dev); \
+	unsigned long value; \
+	u32 reg_val; \
+	int ret; \
+	if (!pmc->scratch || !pmc->soc->regs->reg_field) \
+		return -ENODEV; \
+	ret = kstrtoul(buf, 0, &value); \
+	if (ret) \
+		return ret; \
+	if (value < min_val || value > max_val) \
+		return -EINVAL; \
+	reg_val = tegra_pmc_scratch_readl(pmc, pmc->soc->regs->reg_field); \
+	reg_val &= ~(mask << shift); \
+	reg_val |= (value & mask) << shift; \
+	tegra_pmc_scratch_writel(pmc, reg_val, pmc->soc->regs->reg_field); \
+	return count; \
+} \
+static DEVICE_ATTR_RW(name)
+
+#define TEGRA_PMC_SCRATCH_ATTR_RO(name, reg_field, mask, shift) \
+static ssize_t name##_show(struct device *dev, struct device_attribute *attr, char *buf) \
+{ \
+	struct tegra_pmc *pmc = dev_get_drvdata(dev); \
+	u32 value; \
+	if (!pmc->scratch || !pmc->soc->regs->reg_field) \
+		return -ENODEV; \
+	value = tegra_pmc_scratch_readl(pmc, pmc->soc->regs->reg_field); \
+	value = (value >> shift) & mask; \
+	return sprintf(buf, "%u\n", value); \
+} \
+static DEVICE_ATTR_RO(name)
+
+/* Scratch register sysfs attributes */
+TEGRA_PMC_SCRATCH_ATTR_RW(br_fail_bitmap, scratch_l0_1_0,
+			  BR_FAIL_BITMAP_MASK, BR_FAIL_BITMAP_SHIFT, 0, 0xF);
+TEGRA_PMC_SCRATCH_ATTR_RW(br_active_chain, scratch_l0_1_0,
+			  BR_ACTIVE_CHAIN_MASK, BR_ACTIVE_CHAIN_SHIFT, 0, 3);
+TEGRA_PMC_SCRATCH_ATTR_RW(boot_chain_status_a, scratch_l0_1_0,
+			  BOOT_CHAIN_STATUS_A_MASK, BOOT_CHAIN_STATUS_A_SHIFT, 0, 1);
+TEGRA_PMC_SCRATCH_ATTR_RW(boot_chain_status_b, scratch_l0_1_0,
+			  BOOT_CHAIN_STATUS_B_MASK, BOOT_CHAIN_STATUS_B_SHIFT, 0, 1);
+TEGRA_PMC_SCRATCH_ATTR_RO(boot_chain_current, scratch_l0_1_0,
+			  BOOT_CHAIN_CURRENT_MASK, BOOT_CHAIN_CURRENT_SHIFT);
+TEGRA_PMC_SCRATCH_ATTR_RO(last_boot_chain_failed, scratch_l0_1_0,
+			  LAST_BOOT_CHAIN_FAILED_MASK, LAST_BOOT_CHAIN_FAILED_SHIFT);
+
+TEGRA_PMC_SCRATCH_ATTR_RW(rootfs_sr_magic, scratch_l0_21_0,
+			  ROOTFS_SR_MAGIC_MASK, ROOTFS_SR_MAGIC_SHIFT, 0, 0xFFFF);
+TEGRA_PMC_SCRATCH_ATTR_RW(rootfs_current, scratch_l0_21_0,
+			  ROOTFS_CURRENT_MASK, ROOTFS_CURRENT_SHIFT, 0, 1);
+TEGRA_PMC_SCRATCH_ATTR_RW(rootfs_retry_count_b, scratch_l0_21_0,
+			  ROOTFS_RETRY_COUNT_B_MASK, ROOTFS_RETRY_COUNT_B_SHIFT, 0, 3);
+TEGRA_PMC_SCRATCH_ATTR_RW(rootfs_retry_count_a, scratch_l0_21_0,
+			  ROOTFS_RETRY_COUNT_A_MASK, ROOTFS_RETRY_COUNT_A_SHIFT, 0, 3);
+
+void tegra_pmc_scratch_sysfs_init(struct tegra_pmc *pmc)
+{
+	struct device *dev = pmc->dev;
+	int err;
+
+	if (!pmc->scratch) {
+		dev_warn(dev, "scratch registers not available, skipping sysfs init\n");
+		return;
+	}
+
+	/* Only create attributes if the register fields are defined */
+	if (pmc->soc->regs->scratch_l0_1_0) {
+		err = device_create_file(dev, &dev_attr_br_fail_bitmap);
+		if (err)
+			dev_warn(dev, "failed to create br_fail_bitmap sysfs: %d\n", err);
+
+		err = device_create_file(dev, &dev_attr_br_active_chain);
+		if (err)
+			dev_warn(dev, "failed to create br_active_chain sysfs: %d\n", err);
+
+		err = device_create_file(dev, &dev_attr_boot_chain_status_a);
+		if (err)
+			dev_warn(dev, "failed to create boot_chain_status_a sysfs: %d\n", err);
+
+		err = device_create_file(dev, &dev_attr_boot_chain_status_b);
+		if (err)
+			dev_warn(dev, "failed to create boot_chain_status_b sysfs: %d\n", err);
+
+		err = device_create_file(dev, &dev_attr_boot_chain_current);
+		if (err)
+			dev_warn(dev, "failed to create boot_chain_current sysfs: %d\n", err);
+
+		err = device_create_file(dev, &dev_attr_last_boot_chain_failed);
+		if (err)
+			dev_warn(dev, "failed to create last_boot_chain_failed sysfs: %d\n", err);
+	}
+
+	if (pmc->soc->regs->scratch_l0_21_0) {
+		err = device_create_file(dev, &dev_attr_rootfs_sr_magic);
+		if (err)
+			dev_warn(dev, "failed to create rootfs_sr_magic sysfs: %d\n", err);
+
+		err = device_create_file(dev, &dev_attr_rootfs_current);
+		if (err)
+			dev_warn(dev, "failed to create rootfs_current sysfs: %d\n", err);
+
+		err = device_create_file(dev, &dev_attr_rootfs_retry_count_b);
+		if (err)
+			dev_warn(dev, "failed to create rootfs_retry_count_b sysfs: %d\n", err);
+
+		err = device_create_file(dev, &dev_attr_rootfs_retry_count_a);
+		if (err)
+			dev_warn(dev, "failed to create rootfs_retry_count_a sysfs: %d\n", err);
+	}
+}
+
+void tegra_pmc_scratch_sysfs_remove(struct tegra_pmc *pmc)
+{
+	struct device *dev = pmc->dev;
+
+	if (!pmc->scratch)
+		return;
+
+	if (pmc->soc->regs->scratch_l0_1_0) {
+		device_remove_file(dev, &dev_attr_br_fail_bitmap);
+		device_remove_file(dev, &dev_attr_br_active_chain);
+		device_remove_file(dev, &dev_attr_boot_chain_status_a);
+		device_remove_file(dev, &dev_attr_boot_chain_status_b);
+		device_remove_file(dev, &dev_attr_boot_chain_current);
+		device_remove_file(dev, &dev_attr_last_boot_chain_failed);
+	}
+
+	if (pmc->soc->regs->scratch_l0_21_0) {
+		device_remove_file(dev, &dev_attr_rootfs_sr_magic);
+		device_remove_file(dev, &dev_attr_rootfs_current);
+		device_remove_file(dev, &dev_attr_rootfs_retry_count_b);
+		device_remove_file(dev, &dev_attr_rootfs_retry_count_a);
+	}
+}
+
 static int tegra_pmc_irq_translate(struct irq_domain *domain,
 				   struct irq_fwspec *fwspec,
 				   unsigned long *hwirq,
--- a/drivers/soc/tegra/tegra264-pmc.c
+++ b/drivers/soc/tegra/tegra264-pmc.c
@@ -30,7 +30,6 @@
 #include <dt-bindings/pinctrl/pinctrl-tegra-io-pad.h>
 #include <dt-bindings/gpio/tegra264-gpio.h>

-
 static int tegra_pmc_reboot_notify(struct notifier_block *this,
 				   unsigned long action, void *data)
 {
@@ -117,6 +116,7 @@ static int tegra_pmc_probe(struct platform_device *pdev)

 	pmc->soc = of_device_get_match_data(&pdev->dev);
 	pmc->dev = &pdev->dev;
+	mutex_init(&pmc->powergates_lock);

 	/* take over the memory region from the early initialization */
 	pmc->base = devm_platform_ioremap_resource(pdev, 0);
@@ -160,15 +160,13 @@ static int tegra_pmc_probe(struct platform_device *pdev)
 		return err;
 	}

-	tegra_pmc_reset_sysfs_init(pmc);
-
 	err = tegra_pmc_pinctrl_init(pmc);
 	if (err)
-		goto cleanup_sysfs;
+		return err;

 	err = tegra_pmc_irq_init(pmc);
 	if (err < 0)
-		goto cleanup_sysfs;
+		return err;

 	/* Some wakes require specific filter configuration */
 	if (pmc->soc->set_wake_filters)
@@ -176,12 +174,18 @@ static int tegra_pmc_probe(struct platform_device *pdev)

 	platform_set_drvdata(pdev, pmc);

+	tegra_pmc_scratch_sysfs_init(pmc);
+
 	return 0;
+}

-cleanup_sysfs:
-	tegra_pmc_reset_sysfs_remove(pmc);
+static int tegra_pmc_remove(struct platform_device *pdev)
+{
+	struct tegra_pmc *pmc = platform_get_drvdata(pdev);

-	return err;
+	tegra_pmc_scratch_sysfs_remove(pmc);
+
+	return 0;
 }

 static int __maybe_unused tegra_pmc_resume(struct device *dev)
@@ -204,10 +208,6 @@ static const struct dev_pm_ops tegra_pmc_pm_ops = {
 	SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(tegra_pmc_suspend, tegra_pmc_resume)
 };

-static const char * const tegra264_reset_levels[] = {
-	"L0", "L1", "L2", "WARM"
-};
-
 #define TEGRA264_IO_PAD(_id, _dpd, _request, _status, _has_int_reg, _e_reg06, _e_reg18, _voltage, _e_33v_ctl, _name)	\
 	((struct tegra_io_pad_soc) {					\
 		.id		= (_id),				\
@@ -262,11 +262,8 @@ static const struct pinctrl_pin_desc tegra264_pin_descs[] = {

 static const struct tegra_pmc_regs tegra264_pmc_regs = {
 	.scratch0 = 0x684,
-	.rst_status = 0x4,
-	.rst_source_shift = 0x2,
-	.rst_source_mask = 0x1fc,
-	.rst_level_shift = 0x0,
-	.rst_level_mask = 0x3,
+	.scratch_l0_1_0 = 0x67C,
+	.scratch_l0_21_0 = 0x6cc,
 	.aowake_cntrl = 0x0,
 	.aowake_mask_w = 0x200,
 	.aowake_status_w = 0x410,
@@ -278,102 +275,12 @@ static const struct tegra_pmc_regs tegra264_pmc_regs = {
 	.aowake_ctrl = 0x68c,
 };

-static const char * const tegra264_reset_sources[] = {
-	"SYS_RESET_N",		/* 0 */
-	"CSDC_RTC_XTAL",
-	"VREFRO_POWER_BAD",
-	"SCPM_SOC_XTAL",
-	"SCPM_RTC_XTAL",
-	"FMON_32K",
-	"FMON_OSC",
-	"POD_RTC",
-	"POD_IO",
-	"POD_PLUS_IO_SPLL",
-	"POD_PLUS_SOC",		/* 10 */
-	"VMON_PLUS_UV",
-	"VMON_PLUS_OV",
-	"FUSECRC_FAULT",
-	"OSC_FAULT",
-	"BPMP_BOOT_FAULT",
-	"SCPM_BPMP_CORE_CLK",
-	"SCPM_PSC_SE_CLK",
-	"VMON_SOC_MIN",
-	"VMON_SOC_MAX",
-	"VMON_MSS_MIN",		/* 20 */
-	"VMON_MSS_MAX",
-	"POD_PLUS_IO_U4_TSENSE",
-	"SOC_THERM_FAULT",
-	"FSI_THERM_FAULT",
-	"PSC_TURTLE_MODE",
-	"SCPM_OESP_SE_CLK",
-	"SCPM_SB_SE_CLK",
-	"POD_CPU",
-	"POD_GPU",
-	"DCLS_GPU",		/* 30 */
-	"POD_MSS",
-	"FSI_FMON",
-	"VMON_FSI_MIN",
-	"VMON_FSI_MAX",
-	"VMON_CPU_MIN",
-	"VMON_CPU_MAX",
-	"NVJTAG_SEL_MONITOR",
-	"BPMP_FMON",
-	"AO_WDT_POR",
-	"BPMP_WDT_POR",		/* 40 */
-	"AO_TKE_WDT_POR",
-	"RCE0_WDT_POR",
-	"RCE1_WDT_POR",
-	"DCE_WDT_POR",
-	"PVA_0_WDT_POR",
-	"FSI_R5_WDT_POR",
-	"FSI_R52_0_WDT_POR",
-	"FSI_R52_1_WDT_POR",
-	"FSI_R52_2_WDT_POR",
-	"FSI_R52_3_WDT_POR",	/* 50 */
-	"TOP_0_WDT_POR",
-	"TOP_1_WDT_POR",
-	"TOP_2_WDT_POR",
-	"APE_C0_WDT_POR",
-	"APE_C1_WDT_POR",
-	"GPU_TKE_WDT_POR",
-	"OESP_WDT_POR",
-	"SB_WDT_POR",
-	"PSC_WDT_POR",
-	"SW_MAIN",		/* 60 */
-	"L0L1_RST_OUT_N",
-	"FSI_HSM",
-	"CSITE_SW",
-	"AO_WDT_DBG",
-	"BPMP_WDT_DBG",
-	"AO_TKE_WDT_DBG",
-	"RCE0_WDT_DBG",
-	"RCE1_WDT_DBG",
-	"DCE_WDT_DBG",
-	"PVA_0_WDT_DBG",	/* 70 */
-	"FSI_R5_WDT_DBG",
-	"FSI_R52_0_WDT_DBG",
-	"FSI_R52_1_WDT_DBG",
-	"FSI_R52_2_WDT_DBG",
-	"FSI_R52_3_WDT_DBG",
-	"TOP_0_WDT_DBG",
-	"TOP_1_WDT_DBG",
-	"TOP_2_WDT_DBG",
-	"APE_C0_WDT_DBG",
-	"APE_C1_WDT_DBG",	/* 80 */
-	"SB_WDT_DBG",
-	"OESP_WDT_DBG",
-	"PSC_WDT_DBG",
-	"TSC_0_WDT_DBG",
-	"TSC_1_WDT_DBG",
-	"L2_RST_OUT_N",
-	"SC7",			/* 87 */
-};
-
 static const struct tegra_wake_event tegra264_wake_events[] = {
 	TEGRA_WAKE_IRQ("pmu", 0, 727),
 	TEGRA_WAKE_IRQ("rtc", 65, 548),
 	TEGRA_WAKE_IRQ("usb3_port_0", 79, 965),
 	TEGRA_WAKE_IRQ("usb3_port_1", 80, 965),
+	TEGRA_WAKE_IRQ("usb3_port_2", 81, 965),
 	TEGRA_WAKE_IRQ("usb3_port_3", 82, 965),
 	TEGRA_WAKE_IRQ("usb2_port_0", 83, 965),
 	TEGRA_WAKE_IRQ("usb2_port_1", 84, 965),
@@ -403,10 +310,10 @@ static const struct tegra_pmc_soc tegra264_pmc_soc = {
 	.set_wake_filters = tegra186_pmc_set_wake_filters,
 	.irq_set_wake = tegra186_pmc_irq_set_wake,
 	.irq_set_type = tegra186_pmc_irq_set_type,
-	.reset_sources = tegra264_reset_sources,
-	.num_reset_sources = ARRAY_SIZE(tegra264_reset_sources),
-	.reset_levels = tegra264_reset_levels,
-	.num_reset_levels = ARRAY_SIZE(tegra264_reset_levels),
+	.reset_sources = NULL,
+	.num_reset_sources = 0,
+	.reset_levels = NULL,
+	.num_reset_levels = 0,
 	.num_wake_events = ARRAY_SIZE(tegra264_wake_events),
 	.wake_events = tegra264_wake_events,
 	.max_wake_events = 128,
@@ -430,6 +337,7 @@ static struct platform_driver tegra_pmc_driver = {
 		.pm = &tegra_pmc_pm_ops,
 	},
 	.probe = tegra_pmc_probe,
+	.remove = tegra_pmc_remove,
 };
 builtin_platform_driver(tegra_pmc_driver);

--- a/drivers/usb/host/xhci-tegra.c
+++ b/drivers/usb/host/xhci-tegra.c
@@ -2,7 +2,7 @@
 /*
 * NVIDIA Tegra xHCI host controller driver
 *
- * SPDX-FileCopyrightText: Copyright (c) 2014-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2014-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * Copyright (C) 2014 Google, Inc.
 */

@@ -181,6 +181,8 @@ enum build_info_log {
 	LOG_MEMORY
 };

+#define WAKE_IRQ_START_INDEX			2
+
 struct tegra_xusb_fw_header {
 	__le32 boot_loadaddr_in_imem;
 	__le32 boot_codedfi_offset;
@@ -257,7 +259,7 @@ struct tegra_xusb_soc {
 	unsigned int num_supplies;
 	const struct tegra_xusb_phy_type *phy_types;
 	unsigned int num_types;
-	unsigned int num_wakes;
+	unsigned int max_num_wakes;
 	const struct tegra_xusb_context_soc *context;

 	struct {
@@ -380,6 +382,7 @@ struct tegra_xusb {
 	atomic_t hub_ctrl_use_cnt;

 	struct notifier_block padctl_nb;
+	int num_wakes;
 };

 static struct hc_driver __read_mostly tegra_xhci_hc_driver;
@@ -2168,33 +2171,42 @@ static int tegra_xhci_padctl_notify(struct notifier_block *nb,

 static int tegra_xusb_setup_wakeup(struct platform_device *pdev, struct tegra_xusb *tegra)
 {
-	int i;
+	unsigned int i;

-	if (device_property_read_bool(tegra->dev, "disable-wake"))
+	if (tegra->soc->max_num_wakes == 0)
 		return 0;

 	tegra->wake_irqs = devm_kcalloc(tegra->dev,
-				tegra->soc->num_wakes,
-				sizeof(*tegra->wake_irqs), GFP_KERNEL);
+					tegra->soc->max_num_wakes,
+					sizeof(*tegra->wake_irqs), GFP_KERNEL);
 	if (!tegra->wake_irqs)
 		return -ENOMEM;

-	for (i = 0; i < tegra->soc->num_wakes; i++) {
-		char irq_name[] = "wakeX";
+	/*
+	 * USB wake events are independent of each other, so it is not necessary for a platform
+	 * to utilize all wake-up events supported for a given device. The USB host can operate
+	 * even if wake-up events are not defined or fail to be configured. Therefore, we only
+	 * return critical errors, such as -ENOMEM.
+	 */
+	for (i = 0; i < tegra->soc->max_num_wakes; i++) {
 		struct irq_data *data;

-		snprintf(irq_name, sizeof(irq_name), "wake%d", i);
-		tegra->wake_irqs[i] = platform_get_irq_byname(pdev, irq_name);
+		tegra->wake_irqs[i] = platform_get_irq_optional(pdev, i + WAKE_IRQ_START_INDEX);
 		if (tegra->wake_irqs[i] < 0)
-			continue;
+			break;
+
 		data = irq_get_irq_data(tegra->wake_irqs[i]);
 		if (!data) {
-			irq_dispose_mapping(tegra->wake_irqs[i]);
-			tegra->wake_irqs[i] = -ENXIO;
-			continue;
+			dev_warn(tegra->dev, "get wake event %d irq data fail\n", i);
+			break;
 		}
+
 		irq_set_irq_type(tegra->wake_irqs[i], irqd_get_trigger_type(data));
 	}
+
+	tegra->num_wakes = i;
+	dev_dbg(tegra->dev, "setup %d wake events\n", tegra->num_wakes);
+
 	return 0;
 }

@@ -2253,16 +2265,14 @@ static int tegra_xusb_probe(struct platform_device *pdev)
 			return tegra->mbox_irq;
 	}

+	err = tegra_xusb_setup_wakeup(pdev, tegra);
+	if (err)
+		return err;
+
 	tegra->padctl = tegra_xusb_padctl_get(&pdev->dev);
 	if (IS_ERR(tegra->padctl))
 		return PTR_ERR(tegra->padctl);

-	if (tegra->soc->num_wakes && !tegra->soc->is_xhci_vf) {
-		err = tegra_xusb_setup_wakeup(pdev, tegra);
-		if (err)
-			goto put_padctl;
-	}
-
 	np = of_parse_phandle(pdev->dev.of_node, "nvidia,xusb-padctl", 0);
 	if (!np) {
 		err = -ENODEV;
@@ -2628,7 +2638,6 @@ static void tegra_xusb_remove(struct platform_device *pdev)
 {
 	struct tegra_xusb *tegra = platform_get_drvdata(pdev);
 	struct xhci_hcd *xhci = hcd_to_xhci(tegra->hcd);
-	unsigned int i;

 	if (tegra->soc->is_xhci_vf)
 		tegra_xusb_padctl_event_unregister(tegra->padctl, &tegra->padctl_nb);
@@ -2651,10 +2660,6 @@ static void tegra_xusb_remove(struct platform_device *pdev)
 	if (tegra->padctl_irq)
 		pm_runtime_disable(&pdev->dev);

-	for (i = 0; i < tegra->soc->num_wakes && tegra->wake_irqs; i++)
-		if (tegra->wake_irqs[i] >= 0)
-			irq_dispose_mapping(tegra->wake_irqs[i]);
-
 	pm_runtime_put(&pdev->dev);

 	tegra_xusb_disable(tegra);
@@ -3130,9 +3135,9 @@ out:

 			if (enable_irq_wake(tegra->padctl_irq))
 				dev_err(dev, "failed to enable padctl wakes\n");
-			for (i = 0; i < tegra->soc->num_wakes && tegra->wake_irqs; i++)
-				if (tegra->wake_irqs[i] >= 0)
-					enable_irq_wake(tegra->wake_irqs[i]);
+
+			for (i = 0; i < tegra->num_wakes; i++)
+				enable_irq_wake(tegra->wake_irqs[i]);
 		}
 	}

@@ -3164,9 +3169,9 @@ static __maybe_unused int tegra_xusb_resume(struct device *dev)

 		if (disable_irq_wake(tegra->padctl_irq))
 			dev_err(dev, "failed to disable padctl wakes\n");
-		for (i = 0; i < tegra->soc->num_wakes && tegra->wake_irqs; i++)
-			if (tegra->wake_irqs[i] >= 0)
-				disable_irq_wake(tegra->wake_irqs[i]);
+
+		for (i = 0; i < tegra->num_wakes; i++)
+			disable_irq_wake(tegra->wake_irqs[i]);
 	}
 	tegra->suspended = false;
 	mutex_unlock(&tegra->lock);
@@ -3422,7 +3427,7 @@ static const struct tegra_xusb_soc tegra234_soc = {
 	.num_supplies = ARRAY_SIZE(tegra194_supply_names),
 	.phy_types = tegra194_phy_types,
 	.num_types = ARRAY_SIZE(tegra194_phy_types),
-	.num_wakes = 7,
+	.max_num_wakes = 7,
 	.context = &tegra186_xusb_context,
 	.ports = {
 		.usb3 = { .offset = 0, .count = 4, },
@@ -3497,7 +3502,7 @@ static const struct tegra_xusb_soc tegra264_soc = {
 	.num_supplies = ARRAY_SIZE(tegra194_supply_names),
 	.phy_types = tegra194_phy_types,
 	.num_types = ARRAY_SIZE(tegra194_phy_types),
-	.num_wakes = 8,
+	.max_num_wakes = 8,
 	.context = &tegra186_xusb_context,
 	.ports = {
 		.usb3 = { .offset = 0, .count = 4, },
--- a/include/linux/tegra-epl.h
+++ b/include/linux/tegra-epl.h
@@ -25,7 +25,7 @@ struct epl_error_report_frame {
 	uint16_t reporter_id;
 };

-#ifdef CONFIG_TEGRA_EPL
+#if IS_ENABLED(CONFIG_TEGRA_EPL)
 /**
 * @brief API to check if SW error can be reported via Misc EC
 *        by reading and checking Misc EC error status register value.
--- a/include/soc/tegra/pmc.h
+++ b/include/soc/tegra/pmc.h
@@ -167,6 +167,8 @@ struct tegra_io_pad_soc {

 struct tegra_pmc_regs {
 	unsigned int scratch0;
+	unsigned int scratch_l0_1_0;
+	unsigned int scratch_l0_21_0;
 	unsigned int rst_status;
 	unsigned int rst_source_shift;
 	unsigned int rst_source_mask;
@@ -402,6 +404,9 @@ int tegra186_pmc_suspend(struct tegra_pmc *pmc);
 void tegra_pmc_reset_sysfs_init(struct tegra_pmc *pmc);
 void tegra_pmc_reset_sysfs_remove(struct tegra_pmc *pmc);

+void tegra_pmc_scratch_sysfs_init(struct tegra_pmc *pmc);
+void tegra_pmc_scratch_sysfs_remove(struct tegra_pmc *pmc);
+
 int tegra_pmc_pinctrl_init(struct tegra_pmc *pmc);

 int tegra_pmc_init(struct tegra_pmc *pmc);
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -1437,6 +1437,15 @@ void run_posix_cpu_timers(void)

 	lockdep_assert_irqs_disabled();

+	/*
+	 * Ensure that release_task(tsk) can't happen while
+	 * handle_posix_cpu_timers() is running. Otherwise, a concurrent
+	 * posix_cpu_timer_del() may fail to lock_task_sighand(tsk) and
+	 * miss timer->it.cpu.firing != 0.
+	 */
+	if (tsk->exit_state)
+		return;
+
 	/*
 	 * If the actual expiry is deferred to task work context and the
 	 * work is already scheduled there is no point to do anything here.