Merge tag 'mm-hotfixes-stable-2025-05-17-09-41' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm

+12

MAINTAINERS

··· 15549 15549 F: include/linux/execmem.h 15550 15550 F: mm/execmem.c 15551 15551 15552 + MEMORY MANAGEMENT - GUP (GET USER PAGES) 15553 + M: Andrew Morton <akpm@linux-foundation.org> 15554 + M: David Hildenbrand <david@redhat.com> 15555 + R: Jason Gunthorpe <jgg@nvidia.com> 15556 + R: John Hubbard <jhubbard@nvidia.com> 15557 + R: Peter Xu <peterx@redhat.com> 15558 + L: linux-mm@kvack.org 15559 + S: Maintained 15560 + W: http://www.linux-mm.org 15561 + T: git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm 15562 + F: mm/gup.c 15563 + 15552 15564 MEMORY MANAGEMENT - NUMA MEMBLOCKS AND NUMA EMULATION 15553 15565 M: Andrew Morton <akpm@linux-foundation.org> 15554 15566 M: Mike Rapoport <rppt@kernel.org>

+8

include/linux/pgalloc_tag.h

··· 188 188 return tag; 189 189 } 190 190 191 + static inline struct alloc_tag *pgalloc_tag_get(struct page *page) 192 + { 193 + if (mem_alloc_profiling_enabled()) 194 + return __pgalloc_tag_get(page); 195 + return NULL; 196 + } 197 + 191 198 void pgalloc_tag_split(struct folio *folio, int old_order, int new_order); 192 199 void pgalloc_tag_swap(struct folio *new, struct folio *old); 193 200 ··· 206 199 static inline void alloc_tag_sec_init(void) {} 207 200 static inline void pgalloc_tag_split(struct folio *folio, int old_order, int new_order) {} 208 201 static inline void pgalloc_tag_swap(struct folio *new, struct folio *old) {} 202 + static inline struct alloc_tag *pgalloc_tag_get(struct page *page) { return NULL; } 209 203 210 204 #endif /* CONFIG_MEM_ALLOC_PROFILING */ 211 205

+5 -4

kernel/fork.c

··· 498 498 vma_numab_state_init(new); 499 499 dup_anon_vma_name(orig, new); 500 500 501 - /* track_pfn_copy() will later take care of copying internal state. */ 502 - if (unlikely(new->vm_flags & VM_PFNMAP)) 503 - untrack_pfn_clear(new); 504 - 505 501 return new; 506 502 } 507 503 ··· 668 672 tmp = vm_area_dup(mpnt); 669 673 if (!tmp) 670 674 goto fail_nomem; 675 + 676 + /* track_pfn_copy() will later take care of copying internal state. */ 677 + if (unlikely(tmp->vm_flags & VM_PFNMAP)) 678 + untrack_pfn_clear(tmp); 679 + 671 680 retval = vma_dup_policy(mpnt, tmp); 672 681 if (retval) 673 682 goto fail_nomem_policy;

+22 -6

mm/hugetlb.c

··· 3010 3010 struct hugepage_subpool *spool = subpool_vma(vma); 3011 3011 struct hstate *h = hstate_vma(vma); 3012 3012 struct folio *folio; 3013 - long retval, gbl_chg; 3013 + long retval, gbl_chg, gbl_reserve; 3014 3014 map_chg_state map_chg; 3015 3015 int ret, idx; 3016 3016 struct hugetlb_cgroup *h_cg = NULL; ··· 3163 3163 hugetlb_cgroup_uncharge_cgroup_rsvd(idx, pages_per_huge_page(h), 3164 3164 h_cg); 3165 3165 out_subpool_put: 3166 - if (map_chg) 3167 - hugepage_subpool_put_pages(spool, 1); 3166 + /* 3167 + * put page to subpool iff the quota of subpool's rsv_hpages is used 3168 + * during hugepage_subpool_get_pages. 3169 + */ 3170 + if (map_chg && !gbl_chg) { 3171 + gbl_reserve = hugepage_subpool_put_pages(spool, 1); 3172 + hugetlb_acct_memory(h, -gbl_reserve); 3173 + } 3174 + 3175 + 3168 3176 out_end_reservation: 3169 3177 if (map_chg != MAP_CHG_ENFORCED) 3170 3178 vma_end_reservation(h, vma, addr); ··· 7247 7239 struct vm_area_struct *vma, 7248 7240 vm_flags_t vm_flags) 7249 7241 { 7250 - long chg = -1, add = -1; 7242 + long chg = -1, add = -1, spool_resv, gbl_resv; 7251 7243 struct hstate *h = hstate_inode(inode); 7252 7244 struct hugepage_subpool *spool = subpool_inode(inode); 7253 7245 struct resv_map *resv_map; ··· 7382 7374 return true; 7383 7375 7384 7376 out_put_pages: 7385 - /* put back original number of pages, chg */ 7386 - (void)hugepage_subpool_put_pages(spool, chg); 7377 + spool_resv = chg - gbl_reserve; 7378 + if (spool_resv) { 7379 + /* put sub pool's reservation back, chg - gbl_reserve */ 7380 + gbl_resv = hugepage_subpool_put_pages(spool, spool_resv); 7381 + /* 7382 + * subpool's reserved pages can not be put back due to race, 7383 + * return to hstate. 7384 + */ 7385 + hugetlb_acct_memory(h, -gbl_resv); 7386 + } 7387 7387 out_uncharge_cgroup: 7388 7388 hugetlb_cgroup_uncharge_cgroup_rsvd(hstate_index(h), 7389 7389 chg * pages_per_huge_page(h), h_cg);

-1

mm/internal.h

··· 1590 1590 1591 1591 #ifdef CONFIG_UNACCEPTED_MEMORY 1592 1592 void accept_page(struct page *page); 1593 - void unaccepted_cleanup_work(struct work_struct *work); 1594 1593 #else /* CONFIG_UNACCEPTED_MEMORY */ 1595 1594 static inline void accept_page(struct page *page) 1596 1595 {

+1 -1

mm/memory.c

··· 3751 3751 3752 3752 /* Stabilize the mapcount vs. refcount and recheck. */ 3753 3753 folio_lock_large_mapcount(folio); 3754 - VM_WARN_ON_ONCE(folio_large_mapcount(folio) < folio_ref_count(folio)); 3754 + VM_WARN_ON_ONCE_FOLIO(folio_large_mapcount(folio) > folio_ref_count(folio), folio); 3755 3755 3756 3756 if (folio_test_large_maybe_mapped_shared(folio)) 3757 3757 goto unlock;

-1

mm/mm_init.c

··· 1441 1441 1442 1442 #ifdef CONFIG_UNACCEPTED_MEMORY 1443 1443 INIT_LIST_HEAD(&zone->unaccepted_pages); 1444 - INIT_WORK(&zone->unaccepted_cleanup, unaccepted_cleanup_work); 1445 1444 #endif 1446 1445 } 1447 1446

+20 -68

mm/page_alloc.c

··· 290 290 #endif 291 291 292 292 static bool page_contains_unaccepted(struct page *page, unsigned int order); 293 - static bool cond_accept_memory(struct zone *zone, unsigned int order); 293 + static bool cond_accept_memory(struct zone *zone, unsigned int order, 294 + int alloc_flags); 294 295 static bool __free_unaccepted(struct page *page); 295 296 296 297 int page_group_by_mobility_disabled __read_mostly; ··· 1152 1151 __pgalloc_tag_sub(page, nr); 1153 1152 } 1154 1153 1155 - static inline void pgalloc_tag_sub_pages(struct page *page, unsigned int nr) 1154 + /* When tag is not NULL, assuming mem_alloc_profiling_enabled */ 1155 + static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) 1156 1156 { 1157 - struct alloc_tag *tag; 1158 - 1159 - if (!mem_alloc_profiling_enabled()) 1160 - return; 1161 - 1162 - tag = __pgalloc_tag_get(page); 1163 1157 if (tag) 1164 1158 this_cpu_sub(tag->counters->bytes, PAGE_SIZE * nr); 1165 1159 } ··· 1164 1168 static inline void pgalloc_tag_add(struct page *page, struct task_struct *task, 1165 1169 unsigned int nr) {} 1166 1170 static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {} 1167 - static inline void pgalloc_tag_sub_pages(struct page *page, unsigned int nr) {} 1171 + static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) {} 1168 1172 1169 1173 #endif /* CONFIG_MEM_ALLOC_PROFILING */ 1170 1174 ··· 3612 3616 } 3613 3617 } 3614 3618 3615 - cond_accept_memory(zone, order); 3619 + cond_accept_memory(zone, order, alloc_flags); 3616 3620 3617 3621 /* 3618 3622 * Detect whether the number of free pages is below high ··· 3639 3643 gfp_mask)) { 3640 3644 int ret; 3641 3645 3642 - if (cond_accept_memory(zone, order)) 3646 + if (cond_accept_memory(zone, order, alloc_flags)) 3643 3647 goto try_this_zone; 3644 3648 3645 3649 /* ··· 3692 3696 3693 3697 return page; 3694 3698 } else { 3695 - if (cond_accept_memory(zone, order)) 3699 + if (cond_accept_memory(zone, order, alloc_flags)) 3696 3700 goto try_this_zone; 3697 3701 3698 3702 /* Try again if zone has deferred pages */ ··· 4845 4849 goto failed; 4846 4850 } 4847 4851 4848 - cond_accept_memory(zone, 0); 4852 + cond_accept_memory(zone, 0, alloc_flags); 4849 4853 retry_this_zone: 4850 4854 mark = wmark_pages(zone, alloc_flags & ALLOC_WMARK_MASK) + nr_pages; 4851 4855 if (zone_watermark_fast(zone, 0, mark, ··· 4854 4858 break; 4855 4859 } 4856 4860 4857 - if (cond_accept_memory(zone, 0)) 4861 + if (cond_accept_memory(zone, 0, alloc_flags)) 4858 4862 goto retry_this_zone; 4859 4863 4860 4864 /* Try again if zone has deferred pages */ ··· 5061 5065 { 5062 5066 /* get PageHead before we drop reference */ 5063 5067 int head = PageHead(page); 5068 + /* get alloc tag in case the page is released by others */ 5069 + struct alloc_tag *tag = pgalloc_tag_get(page); 5064 5070 5065 5071 if (put_page_testzero(page)) 5066 5072 __free_frozen_pages(page, order, fpi_flags); 5067 5073 else if (!head) { 5068 - pgalloc_tag_sub_pages(page, (1 << order) - 1); 5074 + pgalloc_tag_sub_pages(tag, (1 << order) - 1); 5069 5075 while (order-- > 0) 5070 5076 __free_frozen_pages(page + (1 << order), order, 5071 5077 fpi_flags); ··· 7172 7174 7173 7175 #ifdef CONFIG_UNACCEPTED_MEMORY 7174 7176 7175 - /* Counts number of zones with unaccepted pages. */ 7176 - static DEFINE_STATIC_KEY_FALSE(zones_with_unaccepted_pages); 7177 - 7178 7177 static bool lazy_accept = true; 7179 - 7180 - void unaccepted_cleanup_work(struct work_struct *work) 7181 - { 7182 - static_branch_dec(&zones_with_unaccepted_pages); 7183 - } 7184 7178 7185 7179 static int __init accept_memory_parse(char *p) 7186 7180 { ··· 7198 7208 static void __accept_page(struct zone *zone, unsigned long *flags, 7199 7209 struct page *page) 7200 7210 { 7201 - bool last; 7202 - 7203 7211 list_del(&page->lru); 7204 - last = list_empty(&zone->unaccepted_pages); 7205 - 7206 7212 account_freepages(zone, -MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE); 7207 7213 __mod_zone_page_state(zone, NR_UNACCEPTED, -MAX_ORDER_NR_PAGES); 7208 7214 __ClearPageUnaccepted(page); ··· 7207 7221 accept_memory(page_to_phys(page), PAGE_SIZE << MAX_PAGE_ORDER); 7208 7222 7209 7223 __free_pages_ok(page, MAX_PAGE_ORDER, FPI_TO_TAIL); 7210 - 7211 - if (last) { 7212 - /* 7213 - * There are two corner cases: 7214 - * 7215 - * - If allocation occurs during the CPU bring up, 7216 - * static_branch_dec() cannot be used directly as 7217 - * it causes a deadlock on cpu_hotplug_lock. 7218 - * 7219 - * Instead, use schedule_work() to prevent deadlock. 7220 - * 7221 - * - If allocation occurs before workqueues are initialized, 7222 - * static_branch_dec() should be called directly. 7223 - * 7224 - * Workqueues are initialized before CPU bring up, so this 7225 - * will not conflict with the first scenario. 7226 - */ 7227 - if (system_wq) 7228 - schedule_work(&zone->unaccepted_cleanup); 7229 - else 7230 - unaccepted_cleanup_work(&zone->unaccepted_cleanup); 7231 - } 7232 7224 } 7233 7225 7234 7226 void accept_page(struct page *page) ··· 7243 7279 return true; 7244 7280 } 7245 7281 7246 - static inline bool has_unaccepted_memory(void) 7247 - { 7248 - return static_branch_unlikely(&zones_with_unaccepted_pages); 7249 - } 7250 - 7251 - static bool cond_accept_memory(struct zone *zone, unsigned int order) 7282 + static bool cond_accept_memory(struct zone *zone, unsigned int order, 7283 + int alloc_flags) 7252 7284 { 7253 7285 long to_accept, wmark; 7254 7286 bool ret = false; 7255 7287 7256 - if (!has_unaccepted_memory()) 7288 + if (list_empty(&zone->unaccepted_pages)) 7257 7289 return false; 7258 7290 7259 - if (list_empty(&zone->unaccepted_pages)) 7291 + /* Bailout, since try_to_accept_memory_one() needs to take a lock */ 7292 + if (alloc_flags & ALLOC_TRYLOCK) 7260 7293 return false; 7261 7294 7262 7295 wmark = promo_wmark_pages(zone); ··· 7286 7325 { 7287 7326 struct zone *zone = page_zone(page); 7288 7327 unsigned long flags; 7289 - bool first = false; 7290 7328 7291 7329 if (!lazy_accept) 7292 7330 return false; 7293 7331 7294 7332 spin_lock_irqsave(&zone->lock, flags); 7295 - first = list_empty(&zone->unaccepted_pages); 7296 7333 list_add_tail(&page->lru, &zone->unaccepted_pages); 7297 7334 account_freepages(zone, MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE); 7298 7335 __mod_zone_page_state(zone, NR_UNACCEPTED, MAX_ORDER_NR_PAGES); 7299 7336 __SetPageUnaccepted(page); 7300 7337 spin_unlock_irqrestore(&zone->lock, flags); 7301 - 7302 - if (first) 7303 - static_branch_inc(&zones_with_unaccepted_pages); 7304 7338 7305 7339 return true; 7306 7340 } ··· 7307 7351 return false; 7308 7352 } 7309 7353 7310 - static bool cond_accept_memory(struct zone *zone, unsigned int order) 7354 + static bool cond_accept_memory(struct zone *zone, unsigned int order, 7355 + int alloc_flags) 7311 7356 { 7312 7357 return false; 7313 7358 } ··· 7379 7422 if (!pcp_allowed_order(order)) 7380 7423 return NULL; 7381 7424 7382 - #ifdef CONFIG_UNACCEPTED_MEMORY 7383 - /* Bailout, since try_to_accept_memory_one() needs to take a lock */ 7384 - if (has_unaccepted_memory()) 7385 - return NULL; 7386 - #endif 7387 7425 /* Bailout, since _deferred_grow_zone() needs to take a lock */ 7388 7426 if (deferred_pages_enabled()) 7389 7427 return NULL;

+10 -2

mm/userfaultfd.c

··· 1064 1064 src_folio->index = linear_page_index(dst_vma, dst_addr); 1065 1065 1066 1066 orig_dst_pte = mk_pte(&src_folio->page, dst_vma->vm_page_prot); 1067 - /* Follow mremap() behavior and treat the entry dirty after the move */ 1068 - orig_dst_pte = pte_mkwrite(pte_mkdirty(orig_dst_pte), dst_vma); 1067 + /* Set soft dirty bit so userspace can notice the pte was moved */ 1068 + #ifdef CONFIG_MEM_SOFT_DIRTY 1069 + orig_dst_pte = pte_mksoft_dirty(orig_dst_pte); 1070 + #endif 1071 + if (pte_dirty(orig_src_pte)) 1072 + orig_dst_pte = pte_mkdirty(orig_dst_pte); 1073 + orig_dst_pte = pte_mkwrite(orig_dst_pte, dst_vma); 1069 1074 1070 1075 set_pte_at(mm, dst_addr, dst_pte, orig_dst_pte); 1071 1076 out: ··· 1105 1100 } 1106 1101 1107 1102 orig_src_pte = ptep_get_and_clear(mm, src_addr, src_pte); 1103 + #ifdef CONFIG_MEM_SOFT_DIRTY 1104 + orig_src_pte = pte_swp_mksoft_dirty(orig_src_pte); 1105 + #endif 1108 1106 set_pte_at(mm, dst_addr, dst_pte, orig_src_pte); 1109 1107 double_pt_unlock(dst_ptl, src_ptl); 1110 1108

+4 -4

mm/zsmalloc.c

··· 1243 1243 class = zspage_class(pool, zspage); 1244 1244 off = offset_in_page(class->size * obj_idx); 1245 1245 1246 - if (off + class->size <= PAGE_SIZE) { 1246 + if (!ZsHugePage(zspage)) 1247 + off += ZS_HANDLE_SIZE; 1248 + 1249 + if (off + mem_len <= PAGE_SIZE) { 1247 1250 /* this object is contained entirely within a page */ 1248 1251 void *dst = kmap_local_zpdesc(zpdesc); 1249 1252 1250 - if (!ZsHugePage(zspage)) 1251 - off += ZS_HANDLE_SIZE; 1252 1253 memcpy(dst + off, handle_mem, mem_len); 1253 1254 kunmap_local(dst); 1254 1255 } else { 1255 1256 /* this object spans two pages */ 1256 1257 size_t sizes[2]; 1257 1258 1258 - off += ZS_HANDLE_SIZE; 1259 1259 sizes[0] = PAGE_SIZE - off; 1260 1260 sizes[1] = mem_len - sizes[0]; 1261 1261