BACKPORT: mm: page_alloc: group fallback functions together
The way the fallback rules are spread out makes them hard to follow. Move the functions next to each other at least. Link: https://lkml.kernel.org/r/20250225001023.1494422-4-hannes@cmpxchg.org Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Reviewed-by: Brendan Jackman <jackmanb@google.com> Reviewed-by: Vlastimil Babka <vbabka@suse.cz> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Bug: 420836317 (cherry picked from commit a4138a2702a4428317ecdb115934554df4b788b4) [ 1. In the original patch of the find_suitable_fallback function, replace MIGRATE_PCPTYPES with MIGRATE_FALLBACKS.; 2. Keep the hook function in the reserve_highatomic_pageblock and unreserve_highatomic_pageblock functions. ] Change-Id: I069e8dd7f8b009c686daef4459f9f1452b3f4c2c Signed-off-by: yipeng xiang <yipengxiang@honor.corp-partner.google.com> Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
This commit is contained in:
committed by
Greg Kroah-Hartman
parent
fc396aefad
commit
9407ed7ee3
+207
-206
@@ -2076,6 +2076,43 @@ static void change_pageblock_range(struct page *pageblock_page,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool boost_watermark(struct zone *zone)
|
||||||
|
{
|
||||||
|
unsigned long max_boost;
|
||||||
|
|
||||||
|
if (!watermark_boost_factor)
|
||||||
|
return false;
|
||||||
|
/*
|
||||||
|
* Don't bother in zones that are unlikely to produce results.
|
||||||
|
* On small machines, including kdump capture kernels running
|
||||||
|
* in a small area, boosting the watermark can cause an out of
|
||||||
|
* memory situation immediately.
|
||||||
|
*/
|
||||||
|
if ((pageblock_nr_pages * 4) > zone_managed_pages(zone))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
max_boost = mult_frac(zone->_watermark[WMARK_HIGH],
|
||||||
|
watermark_boost_factor, 10000);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* high watermark may be uninitialised if fragmentation occurs
|
||||||
|
* very early in boot so do not boost. We do not fall
|
||||||
|
* through and boost by pageblock_nr_pages as failing
|
||||||
|
* allocations that early means that reclaim is not going
|
||||||
|
* to help and it may even be impossible to reclaim the
|
||||||
|
* boosted watermark resulting in a hang.
|
||||||
|
*/
|
||||||
|
if (!max_boost)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
max_boost = max(pageblock_nr_pages, max_boost);
|
||||||
|
|
||||||
|
zone->watermark_boost = min(zone->watermark_boost + pageblock_nr_pages,
|
||||||
|
max_boost);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* When we are falling back to another migratetype during allocation, try to
|
* When we are falling back to another migratetype during allocation, try to
|
||||||
* steal extra free pages from the same pageblocks to satisfy further
|
* steal extra free pages from the same pageblocks to satisfy further
|
||||||
@@ -2117,41 +2154,38 @@ static bool can_steal_fallback(unsigned int order, int start_mt)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool boost_watermark(struct zone *zone)
|
/*
|
||||||
|
* Check whether there is a suitable fallback freepage with requested order.
|
||||||
|
* If only_stealable is true, this function returns fallback_mt only if
|
||||||
|
* we can steal other freepages all together. This would help to reduce
|
||||||
|
* fragmentation due to mixed migratetype pages in one pageblock.
|
||||||
|
*/
|
||||||
|
int find_suitable_fallback(struct free_area *area, unsigned int order,
|
||||||
|
int migratetype, bool only_stealable, bool *can_steal)
|
||||||
{
|
{
|
||||||
unsigned long max_boost;
|
int i;
|
||||||
|
int fallback_mt;
|
||||||
|
|
||||||
if (!watermark_boost_factor)
|
if (area->nr_free == 0)
|
||||||
return false;
|
return -1;
|
||||||
/*
|
|
||||||
* Don't bother in zones that are unlikely to produce results.
|
|
||||||
* On small machines, including kdump capture kernels running
|
|
||||||
* in a small area, boosting the watermark can cause an out of
|
|
||||||
* memory situation immediately.
|
|
||||||
*/
|
|
||||||
if ((pageblock_nr_pages * 4) > zone_managed_pages(zone))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
max_boost = mult_frac(zone->_watermark[WMARK_HIGH],
|
*can_steal = false;
|
||||||
watermark_boost_factor, 10000);
|
for (i = 0; i < MIGRATE_FALLBACKS - 1 ; i++) {
|
||||||
|
fallback_mt = fallbacks[migratetype][i];
|
||||||
|
if (free_area_empty(area, fallback_mt))
|
||||||
|
continue;
|
||||||
|
|
||||||
/*
|
if (can_steal_fallback(order, migratetype))
|
||||||
* high watermark may be uninitialised if fragmentation occurs
|
*can_steal = true;
|
||||||
* very early in boot so do not boost. We do not fall
|
|
||||||
* through and boost by pageblock_nr_pages as failing
|
|
||||||
* allocations that early means that reclaim is not going
|
|
||||||
* to help and it may even be impossible to reclaim the
|
|
||||||
* boosted watermark resulting in a hang.
|
|
||||||
*/
|
|
||||||
if (!max_boost)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
max_boost = max(pageblock_nr_pages, max_boost);
|
if (!only_stealable)
|
||||||
|
return fallback_mt;
|
||||||
|
|
||||||
zone->watermark_boost = min(zone->watermark_boost + pageblock_nr_pages,
|
if (*can_steal)
|
||||||
max_boost);
|
return fallback_mt;
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -2227,184 +2261,6 @@ try_to_steal_block(struct zone *zone, struct page *page,
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Check whether there is a suitable fallback freepage with requested order.
|
|
||||||
* If only_stealable is true, this function returns fallback_mt only if
|
|
||||||
* we can steal other freepages all together. This would help to reduce
|
|
||||||
* fragmentation due to mixed migratetype pages in one pageblock.
|
|
||||||
*/
|
|
||||||
int find_suitable_fallback(struct free_area *area, unsigned int order,
|
|
||||||
int migratetype, bool only_stealable, bool *can_steal)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
int fallback_mt;
|
|
||||||
|
|
||||||
if (area->nr_free == 0)
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
*can_steal = false;
|
|
||||||
for (i = 0; i < MIGRATE_FALLBACKS - 1 ; i++) {
|
|
||||||
fallback_mt = fallbacks[migratetype][i];
|
|
||||||
if (free_area_empty(area, fallback_mt))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if (can_steal_fallback(order, migratetype))
|
|
||||||
*can_steal = true;
|
|
||||||
|
|
||||||
if (!only_stealable)
|
|
||||||
return fallback_mt;
|
|
||||||
|
|
||||||
if (*can_steal)
|
|
||||||
return fallback_mt;
|
|
||||||
}
|
|
||||||
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Reserve the pageblock(s) surrounding an allocation request for
|
|
||||||
* exclusive use of high-order atomic allocations if there are no
|
|
||||||
* empty page blocks that contain a page with a suitable order
|
|
||||||
*/
|
|
||||||
static void reserve_highatomic_pageblock(struct page *page, int order,
|
|
||||||
struct zone *zone)
|
|
||||||
{
|
|
||||||
int mt;
|
|
||||||
unsigned long max_managed, flags;
|
|
||||||
bool bypass = false;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* The number reserved as: minimum is 1 pageblock, maximum is
|
|
||||||
* roughly 1% of a zone. But if 1% of a zone falls below a
|
|
||||||
* pageblock size, then don't reserve any pageblocks.
|
|
||||||
* Check is race-prone but harmless.
|
|
||||||
*/
|
|
||||||
if ((zone_managed_pages(zone) / 100) < pageblock_nr_pages)
|
|
||||||
return;
|
|
||||||
max_managed = ALIGN((zone_managed_pages(zone) / 100), pageblock_nr_pages);
|
|
||||||
if (zone->nr_reserved_highatomic >= max_managed)
|
|
||||||
return;
|
|
||||||
trace_android_vh_reserve_highatomic_bypass(page, &bypass);
|
|
||||||
if (bypass)
|
|
||||||
return;
|
|
||||||
|
|
||||||
spin_lock_irqsave(&zone->lock, flags);
|
|
||||||
|
|
||||||
/* Recheck the nr_reserved_highatomic limit under the lock */
|
|
||||||
if (zone->nr_reserved_highatomic >= max_managed)
|
|
||||||
goto out_unlock;
|
|
||||||
|
|
||||||
/* Yoink! */
|
|
||||||
mt = get_pageblock_migratetype(page);
|
|
||||||
/* Only reserve normal pageblocks (i.e., they can merge with others) */
|
|
||||||
if (!migratetype_is_mergeable(mt))
|
|
||||||
goto out_unlock;
|
|
||||||
|
|
||||||
if (order < pageblock_order) {
|
|
||||||
if (move_freepages_block(zone, page, mt, MIGRATE_HIGHATOMIC) == -1)
|
|
||||||
goto out_unlock;
|
|
||||||
zone->nr_reserved_highatomic += pageblock_nr_pages;
|
|
||||||
} else {
|
|
||||||
change_pageblock_range(page, order, MIGRATE_HIGHATOMIC);
|
|
||||||
zone->nr_reserved_highatomic += 1 << order;
|
|
||||||
}
|
|
||||||
|
|
||||||
out_unlock:
|
|
||||||
spin_unlock_irqrestore(&zone->lock, flags);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Used when an allocation is about to fail under memory pressure. This
|
|
||||||
* potentially hurts the reliability of high-order allocations when under
|
|
||||||
* intense memory pressure but failed atomic allocations should be easier
|
|
||||||
* to recover from than an OOM.
|
|
||||||
*
|
|
||||||
* If @force is true, try to unreserve pageblocks even though highatomic
|
|
||||||
* pageblock is exhausted.
|
|
||||||
*/
|
|
||||||
static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
|
|
||||||
bool force)
|
|
||||||
{
|
|
||||||
struct zonelist *zonelist = ac->zonelist;
|
|
||||||
unsigned long flags;
|
|
||||||
struct zoneref *z;
|
|
||||||
struct zone *zone;
|
|
||||||
struct page *page;
|
|
||||||
int order;
|
|
||||||
int ret;
|
|
||||||
bool skip_unreserve_highatomic = false;
|
|
||||||
|
|
||||||
for_each_zone_zonelist_nodemask(zone, z, zonelist, ac->highest_zoneidx,
|
|
||||||
ac->nodemask) {
|
|
||||||
/*
|
|
||||||
* Preserve at least one pageblock unless memory pressure
|
|
||||||
* is really high.
|
|
||||||
*/
|
|
||||||
if (!force && zone->nr_reserved_highatomic <=
|
|
||||||
pageblock_nr_pages)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
trace_android_vh_unreserve_highatomic_bypass(force, zone,
|
|
||||||
&skip_unreserve_highatomic);
|
|
||||||
if (skip_unreserve_highatomic)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
spin_lock_irqsave(&zone->lock, flags);
|
|
||||||
for (order = 0; order < NR_PAGE_ORDERS; order++) {
|
|
||||||
struct free_area *area = &(zone->free_area[order]);
|
|
||||||
unsigned long size;
|
|
||||||
|
|
||||||
page = get_page_from_free_area(area, MIGRATE_HIGHATOMIC);
|
|
||||||
if (!page)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* It should never happen but changes to
|
|
||||||
* locking could inadvertently allow a per-cpu
|
|
||||||
* drain to add pages to MIGRATE_HIGHATOMIC
|
|
||||||
* while unreserving so be safe and watch for
|
|
||||||
* underflows.
|
|
||||||
*/
|
|
||||||
size = max(pageblock_nr_pages, 1UL << order);
|
|
||||||
size = min(size, zone->nr_reserved_highatomic);
|
|
||||||
zone->nr_reserved_highatomic -= size;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Convert to ac->migratetype and avoid the normal
|
|
||||||
* pageblock stealing heuristics. Minimally, the caller
|
|
||||||
* is doing the work and needs the pages. More
|
|
||||||
* importantly, if the block was always converted to
|
|
||||||
* MIGRATE_UNMOVABLE or another type then the number
|
|
||||||
* of pageblocks that cannot be completely freed
|
|
||||||
* may increase.
|
|
||||||
*/
|
|
||||||
if (order < pageblock_order)
|
|
||||||
ret = move_freepages_block(zone, page,
|
|
||||||
MIGRATE_HIGHATOMIC,
|
|
||||||
ac->migratetype);
|
|
||||||
else {
|
|
||||||
move_to_free_list(page, zone, order,
|
|
||||||
MIGRATE_HIGHATOMIC,
|
|
||||||
ac->migratetype);
|
|
||||||
change_pageblock_range(page, order,
|
|
||||||
ac->migratetype);
|
|
||||||
ret = 1;
|
|
||||||
}
|
|
||||||
/*
|
|
||||||
* Reserving the block(s) already succeeded,
|
|
||||||
* so this should not fail on zone boundaries.
|
|
||||||
*/
|
|
||||||
WARN_ON_ONCE(ret == -1);
|
|
||||||
if (ret > 0) {
|
|
||||||
spin_unlock_irqrestore(&zone->lock, flags);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
spin_unlock_irqrestore(&zone->lock, flags);
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Try to allocate from some fallback migratetype by claiming the entire block,
|
* Try to allocate from some fallback migratetype by claiming the entire block,
|
||||||
@@ -3444,6 +3300,151 @@ out:
|
|||||||
return page;
|
return page;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Reserve the pageblock(s) surrounding an allocation request for
|
||||||
|
* exclusive use of high-order atomic allocations if there are no
|
||||||
|
* empty page blocks that contain a page with a suitable order
|
||||||
|
*/
|
||||||
|
static void reserve_highatomic_pageblock(struct page *page, int order,
|
||||||
|
struct zone *zone)
|
||||||
|
{
|
||||||
|
int mt;
|
||||||
|
unsigned long max_managed, flags;
|
||||||
|
bool bypass = false;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The number reserved as: minimum is 1 pageblock, maximum is
|
||||||
|
* roughly 1% of a zone. But if 1% of a zone falls below a
|
||||||
|
* pageblock size, then don't reserve any pageblocks.
|
||||||
|
* Check is race-prone but harmless.
|
||||||
|
*/
|
||||||
|
if ((zone_managed_pages(zone) / 100) < pageblock_nr_pages)
|
||||||
|
return;
|
||||||
|
max_managed = ALIGN((zone_managed_pages(zone) / 100), pageblock_nr_pages);
|
||||||
|
if (zone->nr_reserved_highatomic >= max_managed)
|
||||||
|
return;
|
||||||
|
trace_android_vh_reserve_highatomic_bypass(page, &bypass);
|
||||||
|
if (bypass)
|
||||||
|
return;
|
||||||
|
|
||||||
|
spin_lock_irqsave(&zone->lock, flags);
|
||||||
|
|
||||||
|
/* Recheck the nr_reserved_highatomic limit under the lock */
|
||||||
|
if (zone->nr_reserved_highatomic >= max_managed)
|
||||||
|
goto out_unlock;
|
||||||
|
|
||||||
|
/* Yoink! */
|
||||||
|
mt = get_pageblock_migratetype(page);
|
||||||
|
/* Only reserve normal pageblocks (i.e., they can merge with others) */
|
||||||
|
if (!migratetype_is_mergeable(mt))
|
||||||
|
goto out_unlock;
|
||||||
|
|
||||||
|
if (order < pageblock_order) {
|
||||||
|
if (move_freepages_block(zone, page, mt, MIGRATE_HIGHATOMIC) == -1)
|
||||||
|
goto out_unlock;
|
||||||
|
zone->nr_reserved_highatomic += pageblock_nr_pages;
|
||||||
|
} else {
|
||||||
|
change_pageblock_range(page, order, MIGRATE_HIGHATOMIC);
|
||||||
|
zone->nr_reserved_highatomic += 1 << order;
|
||||||
|
}
|
||||||
|
|
||||||
|
out_unlock:
|
||||||
|
spin_unlock_irqrestore(&zone->lock, flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Used when an allocation is about to fail under memory pressure. This
|
||||||
|
* potentially hurts the reliability of high-order allocations when under
|
||||||
|
* intense memory pressure but failed atomic allocations should be easier
|
||||||
|
* to recover from than an OOM.
|
||||||
|
*
|
||||||
|
* If @force is true, try to unreserve pageblocks even though highatomic
|
||||||
|
* pageblock is exhausted.
|
||||||
|
*/
|
||||||
|
static bool unreserve_highatomic_pageblock(const struct alloc_context *ac,
|
||||||
|
bool force)
|
||||||
|
{
|
||||||
|
struct zonelist *zonelist = ac->zonelist;
|
||||||
|
unsigned long flags;
|
||||||
|
struct zoneref *z;
|
||||||
|
struct zone *zone;
|
||||||
|
struct page *page;
|
||||||
|
int order;
|
||||||
|
int ret;
|
||||||
|
bool skip_unreserve_highatomic = false;
|
||||||
|
|
||||||
|
for_each_zone_zonelist_nodemask(zone, z, zonelist, ac->highest_zoneidx,
|
||||||
|
ac->nodemask) {
|
||||||
|
/*
|
||||||
|
* Preserve at least one pageblock unless memory pressure
|
||||||
|
* is really high.
|
||||||
|
*/
|
||||||
|
if (!force && zone->nr_reserved_highatomic <=
|
||||||
|
pageblock_nr_pages)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
trace_android_vh_unreserve_highatomic_bypass(force, zone,
|
||||||
|
&skip_unreserve_highatomic);
|
||||||
|
if (skip_unreserve_highatomic)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
spin_lock_irqsave(&zone->lock, flags);
|
||||||
|
for (order = 0; order < NR_PAGE_ORDERS; order++) {
|
||||||
|
struct free_area *area = &(zone->free_area[order]);
|
||||||
|
unsigned long size;
|
||||||
|
|
||||||
|
page = get_page_from_free_area(area, MIGRATE_HIGHATOMIC);
|
||||||
|
if (!page)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* It should never happen but changes to
|
||||||
|
* locking could inadvertently allow a per-cpu
|
||||||
|
* drain to add pages to MIGRATE_HIGHATOMIC
|
||||||
|
* while unreserving so be safe and watch for
|
||||||
|
* underflows.
|
||||||
|
*/
|
||||||
|
size = max(pageblock_nr_pages, 1UL << order);
|
||||||
|
size = min(size, zone->nr_reserved_highatomic);
|
||||||
|
zone->nr_reserved_highatomic -= size;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Convert to ac->migratetype and avoid the normal
|
||||||
|
* pageblock stealing heuristics. Minimally, the caller
|
||||||
|
* is doing the work and needs the pages. More
|
||||||
|
* importantly, if the block was always converted to
|
||||||
|
* MIGRATE_UNMOVABLE or another type then the number
|
||||||
|
* of pageblocks that cannot be completely freed
|
||||||
|
* may increase.
|
||||||
|
*/
|
||||||
|
if (order < pageblock_order)
|
||||||
|
ret = move_freepages_block(zone, page,
|
||||||
|
MIGRATE_HIGHATOMIC,
|
||||||
|
ac->migratetype);
|
||||||
|
else {
|
||||||
|
move_to_free_list(page, zone, order,
|
||||||
|
MIGRATE_HIGHATOMIC,
|
||||||
|
ac->migratetype);
|
||||||
|
change_pageblock_range(page, order,
|
||||||
|
ac->migratetype);
|
||||||
|
ret = 1;
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* Reserving the block(s) already succeeded,
|
||||||
|
* so this should not fail on zone boundaries.
|
||||||
|
*/
|
||||||
|
WARN_ON_ONCE(ret == -1);
|
||||||
|
if (ret > 0) {
|
||||||
|
spin_unlock_irqrestore(&zone->lock, flags);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
spin_unlock_irqrestore(&zone->lock, flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
static inline long __zone_watermark_unusable_free(struct zone *z,
|
static inline long __zone_watermark_unusable_free(struct zone *z,
|
||||||
unsigned int order, unsigned int alloc_flags)
|
unsigned int order, unsigned int alloc_flags)
|
||||||
{
|
{
|
||||||
|
|||||||
Reference in New Issue
Block a user