mm/page_alloc: fix race condition in unaccepted memory handling

The page allocator tracks the number of zones that have unaccepted memory
using static_branch_enc/dec() and uses that static branch in hot paths to
determine if it needs to deal with unaccepted memory.

Borislav and Thomas pointed out that the tracking is racy: operations on
static_branch are not serialized against adding/removing unaccepted pages
to/from the zone.

Sanity checks inside static_branch machinery detects it:

WARNING: CPU: 0 PID: 10 at kernel/jump_label.c:276 __static_key_slow_dec_cpuslocked+0x8e/0xa0

The comment around the WARN() explains the problem:

/*
* Warn about the '-1' case though; since that means a
* decrement is concurrent with a first (0->1) increment. IOW
* people are trying to disable something that wasn't yet fully
* enabled. This suggests an ordering problem on the user side.
*/

The effect of this static_branch optimization is only visible on
microbenchmark.

Instead of adding more complexity around it, remove it altogether.

Link: https://lkml.kernel.org/r/20250506133207.1009676-1-kirill.shutemov@linux.intel.com
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Fixes: dcdfdd40fa82 ("mm: Add support for unaccepted memory")
Link: https://lore.kernel.org/all/20250506092445.GBaBnVXXyvnazly6iF@fat_crate.local
Reported-by: Borislav Petkov <bp@alien8.de>
Tested-by: Borislav Petkov (AMD) <bp@alien8.de>
Reported-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Brendan Jackman <jackmanb@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: <stable@vger.kernel.org> [6.5+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by Kirill A. Shutemov and committed by Andrew Morton fefc0751 23fa022a

-49
-1
mm/internal.h
··· 1590 1591 #ifdef CONFIG_UNACCEPTED_MEMORY 1592 void accept_page(struct page *page); 1593 - void unaccepted_cleanup_work(struct work_struct *work); 1594 #else /* CONFIG_UNACCEPTED_MEMORY */ 1595 static inline void accept_page(struct page *page) 1596 {
··· 1590 1591 #ifdef CONFIG_UNACCEPTED_MEMORY 1592 void accept_page(struct page *page); 1593 #else /* CONFIG_UNACCEPTED_MEMORY */ 1594 static inline void accept_page(struct page *page) 1595 {
-1
mm/mm_init.c
··· 1441 1442 #ifdef CONFIG_UNACCEPTED_MEMORY 1443 INIT_LIST_HEAD(&zone->unaccepted_pages); 1444 - INIT_WORK(&zone->unaccepted_cleanup, unaccepted_cleanup_work); 1445 #endif 1446 } 1447
··· 1441 1442 #ifdef CONFIG_UNACCEPTED_MEMORY 1443 INIT_LIST_HEAD(&zone->unaccepted_pages); 1444 #endif 1445 } 1446
-47
mm/page_alloc.c
··· 7172 7173 #ifdef CONFIG_UNACCEPTED_MEMORY 7174 7175 - /* Counts number of zones with unaccepted pages. */ 7176 - static DEFINE_STATIC_KEY_FALSE(zones_with_unaccepted_pages); 7177 - 7178 static bool lazy_accept = true; 7179 - 7180 - void unaccepted_cleanup_work(struct work_struct *work) 7181 - { 7182 - static_branch_dec(&zones_with_unaccepted_pages); 7183 - } 7184 7185 static int __init accept_memory_parse(char *p) 7186 { ··· 7198 static void __accept_page(struct zone *zone, unsigned long *flags, 7199 struct page *page) 7200 { 7201 - bool last; 7202 - 7203 list_del(&page->lru); 7204 - last = list_empty(&zone->unaccepted_pages); 7205 - 7206 account_freepages(zone, -MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE); 7207 __mod_zone_page_state(zone, NR_UNACCEPTED, -MAX_ORDER_NR_PAGES); 7208 __ClearPageUnaccepted(page); ··· 7207 accept_memory(page_to_phys(page), PAGE_SIZE << MAX_PAGE_ORDER); 7208 7209 __free_pages_ok(page, MAX_PAGE_ORDER, FPI_TO_TAIL); 7210 - 7211 - if (last) { 7212 - /* 7213 - * There are two corner cases: 7214 - * 7215 - * - If allocation occurs during the CPU bring up, 7216 - * static_branch_dec() cannot be used directly as 7217 - * it causes a deadlock on cpu_hotplug_lock. 7218 - * 7219 - * Instead, use schedule_work() to prevent deadlock. 7220 - * 7221 - * - If allocation occurs before workqueues are initialized, 7222 - * static_branch_dec() should be called directly. 7223 - * 7224 - * Workqueues are initialized before CPU bring up, so this 7225 - * will not conflict with the first scenario. 7226 - */ 7227 - if (system_wq) 7228 - schedule_work(&zone->unaccepted_cleanup); 7229 - else 7230 - unaccepted_cleanup_work(&zone->unaccepted_cleanup); 7231 - } 7232 } 7233 7234 void accept_page(struct page *page) ··· 7243 return true; 7244 } 7245 7246 - static inline bool has_unaccepted_memory(void) 7247 - { 7248 - return static_branch_unlikely(&zones_with_unaccepted_pages); 7249 - } 7250 - 7251 static bool cond_accept_memory(struct zone *zone, unsigned int order, 7252 int alloc_flags) 7253 { 7254 long to_accept, wmark; 7255 bool ret = false; 7256 - 7257 - if (!has_unaccepted_memory()) 7258 - return false; 7259 7260 if (list_empty(&zone->unaccepted_pages)) 7261 return false; ··· 7286 { 7287 struct zone *zone = page_zone(page); 7288 unsigned long flags; 7289 - bool first = false; 7290 7291 if (!lazy_accept) 7292 return false; 7293 7294 spin_lock_irqsave(&zone->lock, flags); 7295 - first = list_empty(&zone->unaccepted_pages); 7296 list_add_tail(&page->lru, &zone->unaccepted_pages); 7297 account_freepages(zone, MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE); 7298 __mod_zone_page_state(zone, NR_UNACCEPTED, MAX_ORDER_NR_PAGES); 7299 __SetPageUnaccepted(page); 7300 spin_unlock_irqrestore(&zone->lock, flags); 7301 - 7302 - if (first) 7303 - static_branch_inc(&zones_with_unaccepted_pages); 7304 7305 return true; 7306 }
··· 7172 7173 #ifdef CONFIG_UNACCEPTED_MEMORY 7174 7175 static bool lazy_accept = true; 7176 7177 static int __init accept_memory_parse(char *p) 7178 { ··· 7206 static void __accept_page(struct zone *zone, unsigned long *flags, 7207 struct page *page) 7208 { 7209 list_del(&page->lru); 7210 account_freepages(zone, -MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE); 7211 __mod_zone_page_state(zone, NR_UNACCEPTED, -MAX_ORDER_NR_PAGES); 7212 __ClearPageUnaccepted(page); ··· 7219 accept_memory(page_to_phys(page), PAGE_SIZE << MAX_PAGE_ORDER); 7220 7221 __free_pages_ok(page, MAX_PAGE_ORDER, FPI_TO_TAIL); 7222 } 7223 7224 void accept_page(struct page *page) ··· 7277 return true; 7278 } 7279 7280 static bool cond_accept_memory(struct zone *zone, unsigned int order, 7281 int alloc_flags) 7282 { 7283 long to_accept, wmark; 7284 bool ret = false; 7285 7286 if (list_empty(&zone->unaccepted_pages)) 7287 return false; ··· 7328 { 7329 struct zone *zone = page_zone(page); 7330 unsigned long flags; 7331 7332 if (!lazy_accept) 7333 return false; 7334 7335 spin_lock_irqsave(&zone->lock, flags); 7336 list_add_tail(&page->lru, &zone->unaccepted_pages); 7337 account_freepages(zone, MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE); 7338 __mod_zone_page_state(zone, NR_UNACCEPTED, MAX_ORDER_NR_PAGES); 7339 __SetPageUnaccepted(page); 7340 spin_unlock_irqrestore(&zone->lock, flags); 7341 7342 return true; 7343 }