NVIDIA: SAUCE: iommu/io-pgtable-arm: Support contiguous bit in translation tables

The contiguous bit in translation table entries can be used as a hint to
SMMU that a group of adjacent translation table entries have consistent
attributes and point to a contiguous and properly aligned output address
range. This enables SMMU to predict the properties of the remaining
translation table entries in the same group without accessing them. It
also allows an SMMU implementation to make more efficient use of its TLB
by using a single TLB entry to cover all translation table entries in
the same group.

In the case of 4KB granule size, there are 16 translation table entries
in one group.

This change sets the contiguous bit for such groups of entries that are
completely covered by a single call to map_pages. As it stands, the code
wouldn't set the contiguous bit if a group of adjacent descriptors is
completed by separate calls to map_pages.

Signed-off-by: Daniel Mentz <danielmentz@google.com>
Link: https://lore.kernel.org/linux-iommu/20250430231924.1481493-1-danielmentz@google.com/

Bug 5342740

[aritger: For the backport: updated the __arm_lpae_init_pte() callsite in
arm_lpae_split_blk_unmap() (arm_lpae_split_blk_unmap() is no longer
present at top of tree where the original commit was written).]
Signed-off-by: Andy Ritger <aritger@nvidia.com>
Signed-off-by: Terje Bergstrom <tbergstrom@nvidia.com>
Signed-off-by: Lakshmanan Selvi Muthusamy <lm@nvidia.com>
Change-Id: I83b6572f1cda72837591eca37a1a0b267e46973b
Reviewed-on: https://git-master.nvidia.com/r/c/3rdparty/canonical/linux-noble/+/3413138
GVS: buildbot_gerritrpt <buildbot_gerritrpt@nvidia.com>
Reviewed-by: Brad Griffis <bgriffis@nvidia.com>
This commit is contained in:
Daniel Mentz
2025-04-30 23:19:24 +00:00
committed by mobile promotions
parent bb7a3f15e8
commit 49467e4376
+50 -5
View File
@@ -75,6 +75,7 @@
#define ARM_LPAE_PTE_NSTABLE (((arm_lpae_iopte)1) << 63)
#define ARM_LPAE_PTE_XN (((arm_lpae_iopte)3) << 53)
#define ARM_LPAE_PTE_CONT (((arm_lpae_iopte)1) << 52)
#define ARM_LPAE_PTE_AF (((arm_lpae_iopte)1) << 10)
#define ARM_LPAE_PTE_SH_NS (((arm_lpae_iopte)0) << 8)
#define ARM_LPAE_PTE_SH_OS (((arm_lpae_iopte)2) << 8)
@@ -271,6 +272,27 @@ static void __arm_lpae_sync_pte(arm_lpae_iopte *ptep, int num_entries,
sizeof(*ptep) * num_entries, DMA_TO_DEVICE);
}
static int arm_lpae_cont_ptes(int lvl, struct arm_lpae_io_pgtable *data)
{
switch (ARM_LPAE_GRANULE(data)) {
case SZ_4K:
if (lvl >= 1)
return 16;
break;
case SZ_16K:
if (lvl == 2)
return 32;
else if (lvl == 3)
return 128;
break;
case SZ_64K:
if (lvl >= 2)
return 32;
break;
}
return 1;
}
static void __arm_lpae_clear_pte(arm_lpae_iopte *ptep, struct io_pgtable_cfg *cfg, int num_entries)
{
for (int i = 0; i < num_entries; i++)
@@ -285,8 +307,30 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
unsigned long iova, size_t size, size_t pgcount,
int lvl, arm_lpae_iopte *ptep);
static bool arm_lpae_use_contpte(struct arm_lpae_io_pgtable *data,
unsigned long iova, phys_addr_t paddr,
int lvl, int num_entries, int i)
{
size_t sz = ARM_LPAE_BLOCK_SIZE(lvl, data);
int cont_ptes = arm_lpae_cont_ptes(lvl, data);
int contmask = cont_ptes - 1;
int contpte_addr_mask = sz * cont_ptes - 1;
int map_idx_start, tbl_idx;
if ((paddr & contpte_addr_mask) != (iova & contpte_addr_mask))
return false;
map_idx_start = ARM_LPAE_LVL_IDX(iova, lvl, data);
tbl_idx = map_idx_start + i;
if (((tbl_idx & contmask) <= i) &&
(tbl_idx < ((map_idx_start + num_entries) & ~contmask)))
return true;
return false;
}
static void __arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
phys_addr_t paddr, arm_lpae_iopte prot,
unsigned long iova, phys_addr_t paddr, arm_lpae_iopte prot,
int lvl, int num_entries, arm_lpae_iopte *ptep)
{
arm_lpae_iopte pte = prot;
@@ -300,8 +344,9 @@ static void __arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
pte |= ARM_LPAE_PTE_TYPE_BLOCK;
for (i = 0; i < num_entries; i++)
ptep[i] = pte | paddr_to_iopte(paddr + i * sz, data);
ptep[i] = pte | paddr_to_iopte(paddr + i * sz, data) |
(arm_lpae_use_contpte(data, iova, paddr, lvl, num_entries, i) ?
ARM_LPAE_PTE_CONT : 0);
if (!cfg->coherent_walk)
__arm_lpae_sync_pte(ptep, num_entries, cfg);
}
@@ -334,7 +379,7 @@ static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
}
}
__arm_lpae_init_pte(data, paddr, prot, lvl, num_entries, ptep);
__arm_lpae_init_pte(data, iova, paddr, prot, lvl, num_entries, ptep);
return 0;
}
@@ -599,7 +644,7 @@ static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
if (i >= unmap_idx_start && i < (unmap_idx_start + num_entries))
continue;
__arm_lpae_init_pte(data, blk_paddr, pte, lvl, 1, &tablep[i]);
__arm_lpae_init_pte(data, blk_paddr, pte, iova + i * split_sz, lvl, 1, &tablep[i]);
}
pte = arm_lpae_install_table(tablep, ptep, blk_pte, data);