Revert "page_pool: Track DMA-mapped pages and unmap them when destroying the pool"

This reverts commit 4f51fb0d25 which is
commit ee62ce7a1d909ccba0399680a03c2dee83bcae95 upstream.

It breaks the Android kernel abi and can be brought back in the future
in an abi-safe way if it is really needed.

Bug: 161946584
Change-Id: I1778b757059df295816de520022729e7ab7348a1
Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
This commit is contained in:
Greg Kroah-Hartman
2025-07-04 11:47:48 +00:00
parent e5ead1ec40
commit 7a5c39c156
5 changed files with 18 additions and 147 deletions

View File

@@ -4307,51 +4307,13 @@ void prep_new_hpage(struct page *page, gfp_t gfp_flags, unsigned int alloc_flags
void prep_compound_page(struct page *page, unsigned int order);
#endif
/*
* DMA mapping IDs for page_pool
*
* When DMA-mapping a page, page_pool allocates an ID (from an xarray) and
* stashes it in the upper bits of page->pp_magic. We always want to be able to
* unambiguously identify page pool pages (using page_pool_page_is_pp()). Non-PP
* pages can have arbitrary kernel pointers stored in the same field as pp_magic
* (since it overlaps with page->lru.next), so we must ensure that we cannot
* mistake a valid kernel pointer with any of the values we write into this
* field.
*
* On architectures that set POISON_POINTER_DELTA, this is already ensured,
* since this value becomes part of PP_SIGNATURE; meaning we can just use the
* space between the PP_SIGNATURE value (without POISON_POINTER_DELTA), and the
* lowest bits of POISON_POINTER_DELTA. On arches where POISON_POINTER_DELTA is
* 0, we make sure that we leave the two topmost bits empty, as that guarantees
* we won't mistake a valid kernel pointer for a value we set, regardless of the
* VMSPLIT setting.
*
* Altogether, this means that the number of bits available is constrained by
* the size of an unsigned long (at the upper end, subtracting two bits per the
* above), and the definition of PP_SIGNATURE (with or without
* POISON_POINTER_DELTA).
*/
#define PP_DMA_INDEX_SHIFT (1 + __fls(PP_SIGNATURE - POISON_POINTER_DELTA))
#if POISON_POINTER_DELTA > 0
/* PP_SIGNATURE includes POISON_POINTER_DELTA, so limit the size of the DMA
* index to not overlap with that if set
*/
#define PP_DMA_INDEX_BITS MIN(32, __ffs(POISON_POINTER_DELTA) - PP_DMA_INDEX_SHIFT)
#else
/* Always leave out the topmost two; see above. */
#define PP_DMA_INDEX_BITS MIN(32, BITS_PER_LONG - PP_DMA_INDEX_SHIFT - 2)
#endif
#define PP_DMA_INDEX_MASK GENMASK(PP_DMA_INDEX_BITS + PP_DMA_INDEX_SHIFT - 1, \
PP_DMA_INDEX_SHIFT)
/* Mask used for checking in page_pool_page_is_pp() below. page->pp_magic is
* OR'ed with PP_SIGNATURE after the allocation in order to preserve bit 0 for
* the head page of compound page and bit 1 for pfmemalloc page, as well as the
* bits used for the DMA index. page_is_pfmemalloc() is checked in
* __page_pool_put_page() to avoid recycling the pfmemalloc page.
* the head page of compound page and bit 1 for pfmemalloc page.
* page_is_pfmemalloc() is checked in __page_pool_put_page() to avoid recycling
* the pfmemalloc page.
*/
#define PP_MAGIC_MASK ~(PP_DMA_INDEX_MASK | 0x3UL)
#define PP_MAGIC_MASK ~0x3UL
#ifdef CONFIG_PAGE_POOL
static inline bool page_pool_page_is_pp(struct page *page)

View File

@@ -70,10 +70,6 @@
#define KEY_DESTROY 0xbd
/********** net/core/page_pool.c **********/
/*
* page_pool uses additional free bits within this value to store data, see the
* definition of PP_DMA_INDEX_MASK in mm.h
*/
#define PP_SIGNATURE (0x40 + POISON_POINTER_DELTA)
/********** net/core/skbuff.c **********/

View File

@@ -6,7 +6,6 @@
#include <linux/dma-direction.h>
#include <linux/ptr_ring.h>
#include <linux/types.h>
#include <linux/xarray.h>
#include <net/netmem.h>
#include <linux/android_kabi.h>
@@ -35,9 +34,6 @@
#define PP_FLAG_ALL (PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV | \
PP_FLAG_SYSTEM_POOL | PP_FLAG_ALLOW_UNREADABLE_NETMEM)
/* Index limit to stay within PP_DMA_INDEX_BITS for DMA indices */
#define PP_DMA_INDEX_LIMIT XA_LIMIT(1, BIT(PP_DMA_INDEX_BITS) - 1)
/*
* Fast allocation side cache array/stack
*
@@ -221,8 +217,6 @@ struct page_pool {
void *mp_priv;
struct xarray dma_mapped;
#ifdef CONFIG_PAGE_POOL_STATS
/* recycle stats are per-cpu to avoid locking */
struct page_pool_recycle_stats __percpu *recycle_stats;

View File

@@ -5,7 +5,7 @@
static inline unsigned long netmem_get_pp_magic(netmem_ref netmem)
{
return __netmem_clear_lsb(netmem)->pp_magic & ~PP_DMA_INDEX_MASK;
return __netmem_clear_lsb(netmem)->pp_magic;
}
static inline void netmem_or_pp_magic(netmem_ref netmem, unsigned long pp_magic)
@@ -15,8 +15,6 @@ static inline void netmem_or_pp_magic(netmem_ref netmem, unsigned long pp_magic)
static inline void netmem_clear_pp_magic(netmem_ref netmem)
{
WARN_ON_ONCE(__netmem_clear_lsb(netmem)->pp_magic & PP_DMA_INDEX_MASK);
__netmem_clear_lsb(netmem)->pp_magic = 0;
}
@@ -35,28 +33,4 @@ static inline void netmem_set_dma_addr(netmem_ref netmem,
{
__netmem_clear_lsb(netmem)->dma_addr = dma_addr;
}
static inline unsigned long netmem_get_dma_index(netmem_ref netmem)
{
unsigned long magic;
if (WARN_ON_ONCE(netmem_is_net_iov(netmem)))
return 0;
magic = __netmem_clear_lsb(netmem)->pp_magic;
return (magic & PP_DMA_INDEX_MASK) >> PP_DMA_INDEX_SHIFT;
}
static inline void netmem_set_dma_index(netmem_ref netmem,
unsigned long id)
{
unsigned long magic;
if (WARN_ON_ONCE(netmem_is_net_iov(netmem)))
return;
magic = netmem_get_pp_magic(netmem) | (id << PP_DMA_INDEX_SHIFT);
__netmem_clear_lsb(netmem)->pp_magic = magic;
}
#endif

View File

@@ -272,7 +272,8 @@ static int page_pool_init(struct page_pool *pool,
/* Driver calling page_pool_create() also call page_pool_destroy() */
refcount_set(&pool->user_cnt, 1);
xa_init_flags(&pool->dma_mapped, XA_FLAGS_ALLOC1);
if (pool->dma_map)
get_device(pool->p.dev);
if (pool->slow.flags & PP_FLAG_ALLOW_UNREADABLE_NETMEM) {
/* We rely on rtnl_lock()ing to make sure netdev_rx_queue
@@ -310,7 +311,9 @@ free_ptr_ring:
static void page_pool_uninit(struct page_pool *pool)
{
ptr_ring_cleanup(&pool->ring, NULL);
xa_destroy(&pool->dma_mapped);
if (pool->dma_map)
put_device(pool->p.dev);
#ifdef CONFIG_PAGE_POOL_STATS
if (!pool->system)
@@ -451,21 +454,13 @@ page_pool_dma_sync_for_device(const struct page_pool *pool,
netmem_ref netmem,
u32 dma_sync_size)
{
if (pool->dma_sync && dma_dev_need_sync(pool->p.dev)) {
rcu_read_lock();
/* re-check under rcu_read_lock() to sync with page_pool_scrub() */
if (pool->dma_sync)
__page_pool_dma_sync_for_device(pool, netmem,
dma_sync_size);
rcu_read_unlock();
}
if (pool->dma_sync && dma_dev_need_sync(pool->p.dev))
__page_pool_dma_sync_for_device(pool, netmem, dma_sync_size);
}
static bool page_pool_dma_map(struct page_pool *pool, netmem_ref netmem, gfp_t gfp)
static bool page_pool_dma_map(struct page_pool *pool, netmem_ref netmem)
{
dma_addr_t dma;
int err;
u32 id;
/* Setup DMA mapping: use 'struct page' area for storing DMA-addr
* since dma_addr_t can be either 32 or 64 bits and does not always fit
@@ -479,30 +474,15 @@ static bool page_pool_dma_map(struct page_pool *pool, netmem_ref netmem, gfp_t g
if (dma_mapping_error(pool->p.dev, dma))
return false;
if (page_pool_set_dma_addr_netmem(netmem, dma)) {
WARN_ONCE(1, "unexpected DMA address, please report to netdev@");
if (page_pool_set_dma_addr_netmem(netmem, dma))
goto unmap_failed;
}
if (in_softirq())
err = xa_alloc(&pool->dma_mapped, &id, netmem_to_page(netmem),
PP_DMA_INDEX_LIMIT, gfp);
else
err = xa_alloc_bh(&pool->dma_mapped, &id, netmem_to_page(netmem),
PP_DMA_INDEX_LIMIT, gfp);
if (err) {
WARN_ONCE(err != -ENOMEM, "couldn't track DMA mapping, please report to netdev@");
goto unset_failed;
}
netmem_set_dma_index(netmem, id);
page_pool_dma_sync_for_device(pool, netmem, pool->p.max_len);
return true;
unset_failed:
page_pool_set_dma_addr_netmem(netmem, 0);
unmap_failed:
WARN_ONCE(1, "unexpected DMA address, please report to netdev@");
dma_unmap_page_attrs(pool->p.dev, dma,
PAGE_SIZE << pool->p.order, pool->p.dma_dir,
DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
@@ -519,7 +499,7 @@ static struct page *__page_pool_alloc_page_order(struct page_pool *pool,
if (unlikely(!page))
return NULL;
if (pool->dma_map && unlikely(!page_pool_dma_map(pool, page_to_netmem(page), gfp))) {
if (pool->dma_map && unlikely(!page_pool_dma_map(pool, page_to_netmem(page)))) {
put_page(page);
return NULL;
}
@@ -566,7 +546,7 @@ static noinline netmem_ref __page_pool_alloc_pages_slow(struct page_pool *pool,
*/
for (i = 0; i < nr_pages; i++) {
netmem = pool->alloc.cache[i];
if (dma_map && unlikely(!page_pool_dma_map(pool, netmem, gfp))) {
if (dma_map && unlikely(!page_pool_dma_map(pool, netmem))) {
put_page(netmem_to_page(netmem));
continue;
}
@@ -668,8 +648,6 @@ void page_pool_clear_pp_info(netmem_ref netmem)
static __always_inline void __page_pool_release_page_dma(struct page_pool *pool,
netmem_ref netmem)
{
struct page *old, *page = netmem_to_page(netmem);
unsigned long id;
dma_addr_t dma;
if (!pool->dma_map)
@@ -678,17 +656,6 @@ static __always_inline void __page_pool_release_page_dma(struct page_pool *pool,
*/
return;
id = netmem_get_dma_index(netmem);
if (!id)
return;
if (in_softirq())
old = xa_cmpxchg(&pool->dma_mapped, id, page, NULL, 0);
else
old = xa_cmpxchg_bh(&pool->dma_mapped, id, page, NULL, 0);
if (old != page)
return;
dma = page_pool_get_dma_addr_netmem(netmem);
/* When page is unmapped, it cannot be returned to our pool */
@@ -696,7 +663,6 @@ static __always_inline void __page_pool_release_page_dma(struct page_pool *pool,
PAGE_SIZE << pool->p.order, pool->p.dma_dir,
DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
page_pool_set_dma_addr_netmem(netmem, 0);
netmem_set_dma_index(netmem, 0);
}
/* Disconnects a page (from a page_pool). API users can have a need
@@ -1071,29 +1037,8 @@ static void page_pool_empty_alloc_cache_once(struct page_pool *pool)
static void page_pool_scrub(struct page_pool *pool)
{
unsigned long id;
void *ptr;
page_pool_empty_alloc_cache_once(pool);
if (!pool->destroy_cnt++ && pool->dma_map) {
if (pool->dma_sync) {
/* Disable page_pool_dma_sync_for_device() */
pool->dma_sync = false;
/* Make sure all concurrent returns that may see the old
* value of dma_sync (and thus perform a sync) have
* finished before doing the unmapping below. Skip the
* wait if the device doesn't actually need syncing, or
* if there are no outstanding mapped pages.
*/
if (dma_dev_need_sync(pool->p.dev) &&
!xa_empty(&pool->dma_mapped))
synchronize_net();
}
xa_for_each(&pool->dma_mapped, id, ptr)
__page_pool_release_page_dma(pool, page_to_netmem(ptr));
}
pool->destroy_cnt++;
/* No more consumers should exist, but producers could still
* be in-flight.