commit a8cc7432728d019a10cb412401ebc15ed7504289 · tjh.dev/kernel

+3 -2

.mailmap

··· 199 199 Enric Balletbo i Serra <eballetbo@kernel.org> <enric.balletbo@collabora.com> 200 200 Enric Balletbo i Serra <eballetbo@kernel.org> <eballetbo@iseebcn.com> 201 201 Erik Kaneda <erik.kaneda@intel.com> <erik.schmauss@intel.com> 202 - Eugen Hristev <eugen.hristev@collabora.com> <eugen.hristev@microchip.com> 202 + Eugen Hristev <eugen.hristev@linaro.org> <eugen.hristev@microchip.com> 203 + Eugen Hristev <eugen.hristev@linaro.org> <eugen.hristev@collabora.com> 203 204 Evgeniy Polyakov <johnpol@2ka.mipt.ru> 204 205 Ezequiel Garcia <ezequiel@vanguardiasur.com.ar> <ezequiel@collabora.com> 205 206 Faith Ekstrand <faith.ekstrand@collabora.com> <jason@jlekstrand.net> ··· 283 282 Jan Kuliga <jtkuliga.kdev@gmail.com> <jankul@alatek.krakow.pl> 284 283 Jarkko Sakkinen <jarkko@kernel.org> <jarkko.sakkinen@linux.intel.com> 285 284 Jarkko Sakkinen <jarkko@kernel.org> <jarkko@profian.com> 286 - Jarkko Sakkinen <jarkko@kernel.org> <jarkko.sakkinen@tuni.fi> 285 + Jarkko Sakkinen <jarkko@kernel.org> <jarkko.sakkinen@parity.io> 287 286 Jason Gunthorpe <jgg@ziepe.ca> <jgg@mellanox.com> 288 287 Jason Gunthorpe <jgg@ziepe.ca> <jgg@nvidia.com> 289 288 Jason Gunthorpe <jgg@ziepe.ca> <jgunthorpe@obsidianresearch.com>

+3

fs/nilfs2/namei.c

··· 157 157 /* slow symlink */ 158 158 inode->i_op = &nilfs_symlink_inode_operations; 159 159 inode_nohighmem(inode); 160 + mapping_set_gfp_mask(inode->i_mapping, 161 + mapping_gfp_constraint(inode->i_mapping, 162 + ~__GFP_FS)); 160 163 inode->i_mapping->a_ops = &nilfs_aops; 161 164 err = page_symlink(inode, symname, l); 162 165 if (err)

+5 -4

fs/squashfs/file_direct.c

··· 30 30 int mask = (1 << (msblk->block_log - PAGE_SHIFT)) - 1; 31 31 loff_t start_index = folio->index & ~mask; 32 32 loff_t end_index = start_index | mask; 33 - int i, n, pages, bytes, res = -ENOMEM; 33 + loff_t index; 34 + int i, pages, bytes, res = -ENOMEM; 34 35 struct page **page, *last_page; 35 36 struct squashfs_page_actor *actor; 36 37 void *pageaddr; ··· 46 45 return res; 47 46 48 47 /* Try to grab all the pages covered by the Squashfs block */ 49 - for (i = 0, n = start_index; n <= end_index; n++) { 50 - page[i] = (n == folio->index) ? target_page : 51 - grab_cache_page_nowait(target_page->mapping, n); 48 + for (i = 0, index = start_index; index <= end_index; index++) { 49 + page[i] = (index == folio->index) ? target_page : 50 + grab_cache_page_nowait(target_page->mapping, index); 52 51 53 52 if (page[i] == NULL) 54 53 continue;

+10 -6

include/linux/alloc_tag.h

··· 135 135 #endif 136 136 137 137 /* Caller should verify both ref and tag to be valid */ 138 - static inline void __alloc_tag_ref_set(union codetag_ref *ref, struct alloc_tag *tag) 138 + static inline bool __alloc_tag_ref_set(union codetag_ref *ref, struct alloc_tag *tag) 139 139 { 140 140 alloc_tag_add_check(ref, tag); 141 141 if (!ref || !tag) 142 - return; 142 + return false; 143 143 144 144 ref->ct = &tag->ct; 145 + return true; 145 146 } 146 147 147 - static inline void alloc_tag_ref_set(union codetag_ref *ref, struct alloc_tag *tag) 148 + static inline bool alloc_tag_ref_set(union codetag_ref *ref, struct alloc_tag *tag) 148 149 { 149 - __alloc_tag_ref_set(ref, tag); 150 + if (unlikely(!__alloc_tag_ref_set(ref, tag))) 151 + return false; 152 + 150 153 /* 151 154 * We need in increment the call counter every time we have a new 152 155 * allocation or when we split a large allocation into smaller ones. ··· 157 154 * counter because when we free each part the counter will be decremented. 158 155 */ 159 156 this_cpu_inc(tag->counters->calls); 157 + return true; 160 158 } 161 159 162 160 static inline void alloc_tag_add(union codetag_ref *ref, struct alloc_tag *tag, size_t bytes) 163 161 { 164 - alloc_tag_ref_set(ref, tag); 165 - this_cpu_add(tag->counters->bytes, bytes); 162 + if (likely(alloc_tag_ref_set(ref, tag))) 163 + this_cpu_add(tag->counters->bytes, bytes); 166 164 } 167 165 168 166 static inline void alloc_tag_sub(union codetag_ref *ref, size_t bytes)

+3 -4

include/linux/mmzone.h

··· 458 458 459 459 enum { 460 460 MM_LEAF_TOTAL, /* total leaf entries */ 461 - MM_LEAF_OLD, /* old leaf entries */ 462 461 MM_LEAF_YOUNG, /* young leaf entries */ 463 - MM_NONLEAF_TOTAL, /* total non-leaf entries */ 464 462 MM_NONLEAF_FOUND, /* non-leaf entries found in Bloom filters */ 465 463 MM_NONLEAF_ADDED, /* non-leaf entries added to Bloom filters */ 466 464 NR_MM_STATS ··· 555 557 556 558 void lru_gen_init_pgdat(struct pglist_data *pgdat); 557 559 void lru_gen_init_lruvec(struct lruvec *lruvec); 558 - void lru_gen_look_around(struct page_vma_mapped_walk *pvmw); 560 + bool lru_gen_look_around(struct page_vma_mapped_walk *pvmw); 559 561 560 562 void lru_gen_init_memcg(struct mem_cgroup *memcg); 561 563 void lru_gen_exit_memcg(struct mem_cgroup *memcg); ··· 574 576 { 575 577 } 576 578 577 - static inline void lru_gen_look_around(struct page_vma_mapped_walk *pvmw) 579 + static inline bool lru_gen_look_around(struct page_vma_mapped_walk *pvmw) 578 580 { 581 + return false; 579 582 } 580 583 581 584 static inline void lru_gen_init_memcg(struct mem_cgroup *memcg)

+8

include/linux/page-flags.h

··· 975 975 } \ 976 976 static __always_inline void __folio_set_##fname(struct folio *folio) \ 977 977 { \ 978 + if (folio_test_##fname(folio)) \ 979 + return; \ 978 980 VM_BUG_ON_FOLIO(data_race(folio->page.page_type) != UINT_MAX, \ 979 981 folio); \ 980 982 folio->page.page_type = (unsigned int)PGTY_##lname << 24; \ 981 983 } \ 982 984 static __always_inline void __folio_clear_##fname(struct folio *folio) \ 983 985 { \ 986 + if (folio->page.page_type == UINT_MAX) \ 987 + return; \ 984 988 VM_BUG_ON_FOLIO(!folio_test_##fname(folio), folio); \ 985 989 folio->page.page_type = UINT_MAX; \ 986 990 } ··· 997 993 } \ 998 994 static __always_inline void __SetPage##uname(struct page *page) \ 999 995 { \ 996 + if (Page##uname(page)) \ 997 + return; \ 1000 998 VM_BUG_ON_PAGE(data_race(page->page_type) != UINT_MAX, page); \ 1001 999 page->page_type = (unsigned int)PGTY_##lname << 24; \ 1002 1000 } \ 1003 1001 static __always_inline void __ClearPage##uname(struct page *page) \ 1004 1002 { \ 1003 + if (page->page_type == UINT_MAX) \ 1004 + return; \ 1005 1005 VM_BUG_ON_PAGE(!Page##uname(page), page); \ 1006 1006 page->page_type = UINT_MAX; \ 1007 1007 }

+1

include/linux/swap.h

··· 335 335 * list. 336 336 */ 337 337 struct work_struct discard_work; /* discard worker */ 338 + struct work_struct reclaim_work; /* reclaim worker */ 338 339 struct list_head discard_clusters; /* discard clusters list */ 339 340 struct plist_node avail_lists[]; /* 340 341 * entries in swap_avail_heads, one

+19 -14

mm/gup.c

··· 2394 2394 } 2395 2395 2396 2396 /* 2397 - * Check whether all folios are *allowed* to be pinned indefinitely (longterm). 2397 + * Check whether all folios are *allowed* to be pinned indefinitely (long term). 2398 2398 * Rather confusingly, all folios in the range are required to be pinned via 2399 2399 * FOLL_PIN, before calling this routine. 2400 2400 * 2401 - * If any folios in the range are not allowed to be pinned, then this routine 2402 - * will migrate those folios away, unpin all the folios in the range and return 2403 - * -EAGAIN. The caller should re-pin the entire range with FOLL_PIN and then 2404 - * call this routine again. 2401 + * Return values: 2405 2402 * 2406 - * If an error other than -EAGAIN occurs, this indicates a migration failure. 2407 - * The caller should give up, and propagate the error back up the call stack. 2408 - * 2409 - * If everything is OK and all folios in the range are allowed to be pinned, 2403 + * 0: if everything is OK and all folios in the range are allowed to be pinned, 2410 2404 * then this routine leaves all folios pinned and returns zero for success. 2405 + * 2406 + * -EAGAIN: if any folios in the range are not allowed to be pinned, then this 2407 + * routine will migrate those folios away, unpin all the folios in the range. If 2408 + * migration of the entire set of folios succeeds, then -EAGAIN is returned. The 2409 + * caller should re-pin the entire range with FOLL_PIN and then call this 2410 + * routine again. 2411 + * 2412 + * -ENOMEM, or any other -errno: if an error *other* than -EAGAIN occurs, this 2413 + * indicates a migration failure. The caller should give up, and propagate the 2414 + * error back up the call stack. The caller does not need to unpin any folios in 2415 + * that case, because this routine will do the unpinning. 2411 2416 */ 2412 2417 static long check_and_migrate_movable_folios(unsigned long nr_folios, 2413 2418 struct folio **folios) ··· 2430 2425 } 2431 2426 2432 2427 /* 2433 - * This routine just converts all the pages in the @pages array to folios and 2434 - * calls check_and_migrate_movable_folios() to do the heavy lifting. 2435 - * 2436 - * Please see the check_and_migrate_movable_folios() documentation for details. 2428 + * Return values and behavior are the same as those for 2429 + * check_and_migrate_movable_folios(). 2437 2430 */ 2438 2431 static long check_and_migrate_movable_pages(unsigned long nr_pages, 2439 2432 struct page **pages) ··· 2440 2437 long i, ret; 2441 2438 2442 2439 folios = kmalloc_array(nr_pages, sizeof(*folios), GFP_KERNEL); 2443 - if (!folios) 2440 + if (!folios) { 2441 + unpin_user_pages(pages, nr_pages); 2444 2442 return -ENOMEM; 2443 + } 2445 2444 2446 2445 for (i = 0; i < nr_pages; i++) 2447 2446 folios[i] = page_folio(pages[i]);

-27

mm/kasan/kasan_test_c.c

··· 1810 1810 free_pages((unsigned long)p_ptr, 1); 1811 1811 } 1812 1812 1813 - static void vmalloc_percpu(struct kunit *test) 1814 - { 1815 - char __percpu *ptr; 1816 - int cpu; 1817 - 1818 - /* 1819 - * This test is specifically crafted for the software tag-based mode, 1820 - * the only tag-based mode that poisons percpu mappings. 1821 - */ 1822 - KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_SW_TAGS); 1823 - 1824 - ptr = __alloc_percpu(PAGE_SIZE, PAGE_SIZE); 1825 - 1826 - for_each_possible_cpu(cpu) { 1827 - char *c_ptr = per_cpu_ptr(ptr, cpu); 1828 - 1829 - KUNIT_EXPECT_GE(test, (u8)get_tag(c_ptr), (u8)KASAN_TAG_MIN); 1830 - KUNIT_EXPECT_LT(test, (u8)get_tag(c_ptr), (u8)KASAN_TAG_KERNEL); 1831 - 1832 - /* Make sure that in-bounds accesses don't crash the kernel. */ 1833 - *c_ptr = 0; 1834 - } 1835 - 1836 - free_percpu(ptr); 1837 - } 1838 - 1839 1813 /* 1840 1814 * Check that the assigned pointer tag falls within the [KASAN_TAG_MIN, 1841 1815 * KASAN_TAG_KERNEL) range (note: excluding the match-all tag) for tag-based ··· 1997 2023 KUNIT_CASE(vmalloc_oob), 1998 2024 KUNIT_CASE(vmap_tags), 1999 2025 KUNIT_CASE(vm_map_ram_tags), 2000 - KUNIT_CASE(vmalloc_percpu), 2001 2026 KUNIT_CASE(match_all_not_assigned), 2002 2027 KUNIT_CASE(match_all_ptr_tag), 2003 2028 KUNIT_CASE(match_all_mem_tag),

+3 -2

mm/migrate.c

··· 206 206 pte_t newpte; 207 207 void *addr; 208 208 209 - VM_BUG_ON_PAGE(PageCompound(page), page); 209 + if (PageCompound(page)) 210 + return false; 210 211 VM_BUG_ON_PAGE(!PageAnon(page), page); 211 212 VM_BUG_ON_PAGE(!PageLocked(page), page); 212 213 VM_BUG_ON_PAGE(pte_present(*pvmw->pte), page); ··· 1178 1177 * not accounted to NR_ISOLATED_*. They can be recognized 1179 1178 * as __folio_test_movable 1180 1179 */ 1181 - if (likely(!__folio_test_movable(src))) 1180 + if (likely(!__folio_test_movable(src)) && reason != MR_DEMOTION) 1182 1181 mod_node_page_state(folio_pgdat(src), NR_ISOLATED_ANON + 1183 1182 folio_is_file_lru(src), -folio_nr_pages(src)); 1184 1183

+2 -1

mm/mmap.c

··· 900 900 901 901 if (get_area) { 902 902 addr = get_area(file, addr, len, pgoff, flags); 903 - } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { 903 + } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) 904 + && IS_ALIGNED(len, PMD_SIZE)) { 904 905 /* Ensures that larger anonymous mappings are THP aligned. */ 905 906 addr = thp_get_unmapped_area_vmflags(file, addr, len, 906 907 pgoff, flags, vm_flags);

+2 -2

mm/page_io.c

··· 570 570 * attempt to access it in the page fault retry time check. 571 571 */ 572 572 get_task_struct(current); 573 - count_vm_event(PSWPIN); 573 + count_vm_events(PSWPIN, folio_nr_pages(folio)); 574 574 submit_bio_wait(&bio); 575 575 __end_swap_bio_read(&bio); 576 576 put_task_struct(current); ··· 585 585 bio->bi_iter.bi_sector = swap_folio_sector(folio); 586 586 bio->bi_end_io = end_swap_bio_read; 587 587 bio_add_folio_nofail(bio, folio, folio_size(folio), 0); 588 - count_vm_event(PSWPIN); 588 + count_vm_events(PSWPIN, folio_nr_pages(folio)); 589 589 submit_bio(bio); 590 590 } 591 591

+3 -6

mm/rmap.c

··· 885 885 return false; 886 886 } 887 887 888 - if (pvmw.pte) { 889 - if (lru_gen_enabled() && 890 - pte_young(ptep_get(pvmw.pte))) { 891 - lru_gen_look_around(&pvmw); 888 + if (lru_gen_enabled() && pvmw.pte) { 889 + if (lru_gen_look_around(&pvmw)) 892 890 referenced++; 893 - } 894 - 891 + } else if (pvmw.pte) { 895 892 if (ptep_clear_flush_young_notify(vma, address, 896 893 pvmw.pte)) 897 894 referenced++;

+5 -3

mm/shrinker.c

··· 76 76 77 77 int alloc_shrinker_info(struct mem_cgroup *memcg) 78 78 { 79 - struct shrinker_info *info; 80 79 int nid, ret = 0; 81 80 int array_size = 0; 82 81 83 82 mutex_lock(&shrinker_mutex); 84 83 array_size = shrinker_unit_size(shrinker_nr_max); 85 84 for_each_node(nid) { 86 - info = kvzalloc_node(sizeof(*info) + array_size, GFP_KERNEL, nid); 85 + struct shrinker_info *info = kvzalloc_node(sizeof(*info) + array_size, 86 + GFP_KERNEL, nid); 87 87 if (!info) 88 88 goto err; 89 89 info->map_nr_max = shrinker_nr_max; 90 - if (shrinker_unit_alloc(info, NULL, nid)) 90 + if (shrinker_unit_alloc(info, NULL, nid)) { 91 + kvfree(info); 91 92 goto err; 93 + } 92 94 rcu_assign_pointer(memcg->nodeinfo[nid]->shrinker_info, info); 93 95 } 94 96 mutex_unlock(&shrinker_mutex);

+30 -19

mm/swapfile.c

··· 731 731 return offset; 732 732 } 733 733 734 - static void swap_reclaim_full_clusters(struct swap_info_struct *si) 734 + /* Return true if reclaimed a whole cluster */ 735 + static void swap_reclaim_full_clusters(struct swap_info_struct *si, bool force) 735 736 { 736 737 long to_scan = 1; 737 738 unsigned long offset, end; 738 739 struct swap_cluster_info *ci; 739 740 unsigned char *map = si->swap_map; 740 - int nr_reclaim, total_reclaimed = 0; 741 + int nr_reclaim; 741 742 742 - if (atomic_long_read(&nr_swap_pages) <= SWAPFILE_CLUSTER) 743 + if (force) 743 744 to_scan = si->inuse_pages / SWAPFILE_CLUSTER; 744 745 745 746 while (!list_empty(&si->full_clusters)) { ··· 750 749 end = min(si->max, offset + SWAPFILE_CLUSTER); 751 750 to_scan--; 752 751 752 + spin_unlock(&si->lock); 753 753 while (offset < end) { 754 754 if (READ_ONCE(map[offset]) == SWAP_HAS_CACHE) { 755 - spin_unlock(&si->lock); 756 755 nr_reclaim = __try_to_reclaim_swap(si, offset, 757 756 TTRS_ANYWAY | TTRS_DIRECT); 758 - spin_lock(&si->lock); 759 - if (nr_reclaim > 0) { 760 - offset += nr_reclaim; 761 - total_reclaimed += nr_reclaim; 762 - continue; 763 - } else if (nr_reclaim < 0) { 764 - offset += -nr_reclaim; 757 + if (nr_reclaim) { 758 + offset += abs(nr_reclaim); 765 759 continue; 766 760 } 767 761 } 768 762 offset++; 769 763 } 770 - if (to_scan <= 0 || total_reclaimed) 764 + spin_lock(&si->lock); 765 + 766 + if (to_scan <= 0) 771 767 break; 772 768 } 769 + } 770 + 771 + static void swap_reclaim_work(struct work_struct *work) 772 + { 773 + struct swap_info_struct *si; 774 + 775 + si = container_of(work, struct swap_info_struct, reclaim_work); 776 + 777 + spin_lock(&si->lock); 778 + swap_reclaim_full_clusters(si, true); 779 + spin_unlock(&si->lock); 773 780 } 774 781 775 782 /* ··· 808 799 VM_BUG_ON(!found); 809 800 goto done; 810 801 } 802 + 803 + /* Try reclaim from full clusters if free clusters list is drained */ 804 + if (vm_swap_full()) 805 + swap_reclaim_full_clusters(si, false); 811 806 812 807 if (order < PMD_ORDER) { 813 808 unsigned int frags = 0; ··· 894 881 } 895 882 896 883 done: 897 - /* Try reclaim from full clusters if device is nearfull */ 898 - if (vm_swap_full() && (!found || (si->pages - si->inuse_pages) < SWAPFILE_CLUSTER)) { 899 - swap_reclaim_full_clusters(si); 900 - if (!found && !order && si->pages != si->inuse_pages) 901 - goto new_cluster; 902 - } 903 - 904 884 cluster->next[order] = offset; 905 885 return found; 906 886 } ··· 928 922 si->lowest_bit = si->max; 929 923 si->highest_bit = 0; 930 924 del_from_avail_list(si); 925 + 926 + if (vm_swap_full()) 927 + schedule_work(&si->reclaim_work); 931 928 } 932 929 } 933 930 ··· 2825 2816 wait_for_completion(&p->comp); 2826 2817 2827 2818 flush_work(&p->discard_work); 2819 + flush_work(&p->reclaim_work); 2828 2820 2829 2821 destroy_swap_extents(p); 2830 2822 if (p->flags & SWP_CONTINUED) ··· 3386 3376 return PTR_ERR(si); 3387 3377 3388 3378 INIT_WORK(&si->discard_work, swap_discard_work); 3379 + INIT_WORK(&si->reclaim_work, swap_reclaim_work); 3389 3380 3390 3381 name = getname(specialfile); 3391 3382 if (IS_ERR(name)) {

+54 -48

mm/vmscan.c

··· 56 56 #include <linux/khugepaged.h> 57 57 #include <linux/rculist_nulls.h> 58 58 #include <linux/random.h> 59 + #include <linux/mmu_notifier.h> 59 60 60 61 #include <asm/tlbflush.h> 61 62 #include <asm/div64.h> ··· 3295 3294 return false; 3296 3295 } 3297 3296 3298 - static unsigned long get_pte_pfn(pte_t pte, struct vm_area_struct *vma, unsigned long addr) 3297 + static unsigned long get_pte_pfn(pte_t pte, struct vm_area_struct *vma, unsigned long addr, 3298 + struct pglist_data *pgdat) 3299 3299 { 3300 3300 unsigned long pfn = pte_pfn(pte); 3301 3301 ··· 3308 3306 if (WARN_ON_ONCE(pte_devmap(pte) || pte_special(pte))) 3309 3307 return -1; 3310 3308 3309 + if (!pte_young(pte) && !mm_has_notifiers(vma->vm_mm)) 3310 + return -1; 3311 + 3311 3312 if (WARN_ON_ONCE(!pfn_valid(pfn))) 3313 + return -1; 3314 + 3315 + if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat)) 3312 3316 return -1; 3313 3317 3314 3318 return pfn; 3315 3319 } 3316 3320 3317 - static unsigned long get_pmd_pfn(pmd_t pmd, struct vm_area_struct *vma, unsigned long addr) 3321 + static unsigned long get_pmd_pfn(pmd_t pmd, struct vm_area_struct *vma, unsigned long addr, 3322 + struct pglist_data *pgdat) 3318 3323 { 3319 3324 unsigned long pfn = pmd_pfn(pmd); 3320 3325 ··· 3333 3324 if (WARN_ON_ONCE(pmd_devmap(pmd))) 3334 3325 return -1; 3335 3326 3327 + if (!pmd_young(pmd) && !mm_has_notifiers(vma->vm_mm)) 3328 + return -1; 3329 + 3336 3330 if (WARN_ON_ONCE(!pfn_valid(pfn))) 3331 + return -1; 3332 + 3333 + if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat)) 3337 3334 return -1; 3338 3335 3339 3336 return pfn; ··· 3349 3334 struct pglist_data *pgdat, bool can_swap) 3350 3335 { 3351 3336 struct folio *folio; 3352 - 3353 - /* try to avoid unnecessary memory loads */ 3354 - if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat)) 3355 - return NULL; 3356 3337 3357 3338 folio = pfn_folio(pfn); 3358 3339 if (folio_nid(folio) != pgdat->node_id) ··· 3405 3394 total++; 3406 3395 walk->mm_stats[MM_LEAF_TOTAL]++; 3407 3396 3408 - pfn = get_pte_pfn(ptent, args->vma, addr); 3397 + pfn = get_pte_pfn(ptent, args->vma, addr, pgdat); 3409 3398 if (pfn == -1) 3410 3399 continue; 3411 - 3412 - if (!pte_young(ptent)) { 3413 - walk->mm_stats[MM_LEAF_OLD]++; 3414 - continue; 3415 - } 3416 3400 3417 3401 folio = get_pfn_folio(pfn, memcg, pgdat, walk->can_swap); 3418 3402 if (!folio) 3419 3403 continue; 3420 3404 3421 - if (!ptep_test_and_clear_young(args->vma, addr, pte + i)) 3422 - VM_WARN_ON_ONCE(true); 3405 + if (!ptep_clear_young_notify(args->vma, addr, pte + i)) 3406 + continue; 3423 3407 3424 3408 young++; 3425 3409 walk->mm_stats[MM_LEAF_YOUNG]++; ··· 3480 3474 /* don't round down the first address */ 3481 3475 addr = i ? (*first & PMD_MASK) + i * PMD_SIZE : *first; 3482 3476 3483 - pfn = get_pmd_pfn(pmd[i], vma, addr); 3484 - if (pfn == -1) 3477 + if (!pmd_present(pmd[i])) 3485 3478 goto next; 3486 3479 3487 3480 if (!pmd_trans_huge(pmd[i])) { 3488 - if (!walk->force_scan && should_clear_pmd_young()) 3481 + if (!walk->force_scan && should_clear_pmd_young() && 3482 + !mm_has_notifiers(args->mm)) 3489 3483 pmdp_test_and_clear_young(vma, addr, pmd + i); 3490 3484 goto next; 3491 3485 } 3486 + 3487 + pfn = get_pmd_pfn(pmd[i], vma, addr, pgdat); 3488 + if (pfn == -1) 3489 + goto next; 3492 3490 3493 3491 folio = get_pfn_folio(pfn, memcg, pgdat, walk->can_swap); 3494 3492 if (!folio) 3495 3493 goto next; 3496 3494 3497 - if (!pmdp_test_and_clear_young(vma, addr, pmd + i)) 3495 + if (!pmdp_clear_young_notify(vma, addr, pmd + i)) 3498 3496 goto next; 3499 3497 3500 3498 walk->mm_stats[MM_LEAF_YOUNG]++; ··· 3556 3546 } 3557 3547 3558 3548 if (pmd_trans_huge(val)) { 3559 - unsigned long pfn = pmd_pfn(val); 3560 3549 struct pglist_data *pgdat = lruvec_pgdat(walk->lruvec); 3550 + unsigned long pfn = get_pmd_pfn(val, vma, addr, pgdat); 3561 3551 3562 3552 walk->mm_stats[MM_LEAF_TOTAL]++; 3563 3553 3564 - if (!pmd_young(val)) { 3565 - walk->mm_stats[MM_LEAF_OLD]++; 3566 - continue; 3567 - } 3568 - 3569 - /* try to avoid unnecessary memory loads */ 3570 - if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat)) 3571 - continue; 3572 - 3573 - walk_pmd_range_locked(pud, addr, vma, args, bitmap, &first); 3554 + if (pfn != -1) 3555 + walk_pmd_range_locked(pud, addr, vma, args, bitmap, &first); 3574 3556 continue; 3575 3557 } 3576 3558 3577 - walk->mm_stats[MM_NONLEAF_TOTAL]++; 3578 - 3579 - if (!walk->force_scan && should_clear_pmd_young()) { 3559 + if (!walk->force_scan && should_clear_pmd_young() && 3560 + !mm_has_notifiers(args->mm)) { 3580 3561 if (!pmd_young(val)) 3581 3562 continue; 3582 3563 ··· 4041 4040 * the PTE table to the Bloom filter. This forms a feedback loop between the 4042 4041 * eviction and the aging. 4043 4042 */ 4044 - void lru_gen_look_around(struct page_vma_mapped_walk *pvmw) 4043 + bool lru_gen_look_around(struct page_vma_mapped_walk *pvmw) 4045 4044 { 4046 4045 int i; 4047 4046 unsigned long start; 4048 4047 unsigned long end; 4049 4048 struct lru_gen_mm_walk *walk; 4050 - int young = 0; 4049 + int young = 1; 4051 4050 pte_t *pte = pvmw->pte; 4052 4051 unsigned long addr = pvmw->address; 4053 4052 struct vm_area_struct *vma = pvmw->vma; ··· 4063 4062 lockdep_assert_held(pvmw->ptl); 4064 4063 VM_WARN_ON_ONCE_FOLIO(folio_test_lru(folio), folio); 4065 4064 4065 + if (!ptep_clear_young_notify(vma, addr, pte)) 4066 + return false; 4067 + 4066 4068 if (spin_is_contended(pvmw->ptl)) 4067 - return; 4069 + return true; 4068 4070 4069 4071 /* exclude special VMAs containing anon pages from COW */ 4070 4072 if (vma->vm_flags & VM_SPECIAL) 4071 - return; 4073 + return true; 4072 4074 4073 4075 /* avoid taking the LRU lock under the PTL when possible */ 4074 4076 walk = current->reclaim_state ? current->reclaim_state->mm_walk : NULL; 4075 4077 4076 4078 start = max(addr & PMD_MASK, vma->vm_start); 4077 4079 end = min(addr | ~PMD_MASK, vma->vm_end - 1) + 1; 4080 + 4081 + if (end - start == PAGE_SIZE) 4082 + return true; 4078 4083 4079 4084 if (end - start > MIN_LRU_BATCH * PAGE_SIZE) { 4080 4085 if (addr - start < MIN_LRU_BATCH * PAGE_SIZE / 2) ··· 4095 4088 4096 4089 /* folio_update_gen() requires stable folio_memcg() */ 4097 4090 if (!mem_cgroup_trylock_pages(memcg)) 4098 - return; 4091 + return true; 4099 4092 4100 4093 arch_enter_lazy_mmu_mode(); 4101 4094 ··· 4105 4098 unsigned long pfn; 4106 4099 pte_t ptent = ptep_get(pte + i); 4107 4100 4108 - pfn = get_pte_pfn(ptent, vma, addr); 4101 + pfn = get_pte_pfn(ptent, vma, addr, pgdat); 4109 4102 if (pfn == -1) 4110 - continue; 4111 - 4112 - if (!pte_young(ptent)) 4113 4103 continue; 4114 4104 4115 4105 folio = get_pfn_folio(pfn, memcg, pgdat, can_swap); 4116 4106 if (!folio) 4117 4107 continue; 4118 4108 4119 - if (!ptep_test_and_clear_young(vma, addr, pte + i)) 4120 - VM_WARN_ON_ONCE(true); 4109 + if (!ptep_clear_young_notify(vma, addr, pte + i)) 4110 + continue; 4121 4111 4122 4112 young++; 4123 4113 ··· 4144 4140 /* feedback from rmap walkers to page table walkers */ 4145 4141 if (mm_state && suitable_to_scan(i, young)) 4146 4142 update_bloom_filter(mm_state, max_seq, pvmw->pmd); 4143 + 4144 + return true; 4147 4145 } 4148 4146 4149 4147 /****************************************************************************** ··· 5260 5254 for (tier = 0; tier < MAX_NR_TIERS; tier++) { 5261 5255 seq_printf(m, " %10d", tier); 5262 5256 for (type = 0; type < ANON_AND_FILE; type++) { 5263 - const char *s = " "; 5257 + const char *s = "xxx"; 5264 5258 unsigned long n[3] = {}; 5265 5259 5266 5260 if (seq == max_seq) { 5267 - s = "RT "; 5261 + s = "RTx"; 5268 5262 n[0] = READ_ONCE(lrugen->avg_refaulted[type][tier]); 5269 5263 n[1] = READ_ONCE(lrugen->avg_total[type][tier]); 5270 5264 } else if (seq == min_seq[type] || NR_HIST_GENS > 1) { ··· 5286 5280 5287 5281 seq_puts(m, " "); 5288 5282 for (i = 0; i < NR_MM_STATS; i++) { 5289 - const char *s = " "; 5283 + const char *s = "xxxx"; 5290 5284 unsigned long n = 0; 5291 5285 5292 5286 if (seq == max_seq && NR_HIST_GENS == 1) { 5293 - s = "LOYNFA"; 5287 + s = "TYFA"; 5294 5288 n = READ_ONCE(mm_state->stats[hist][i]); 5295 5289 } else if (seq != max_seq && NR_HIST_GENS > 1) { 5296 - s = "loynfa"; 5290 + s = "tyfa"; 5297 5291 n = READ_ONCE(mm_state->stats[hist][i]); 5298 5292 } 5299 5293

+5 -4

tools/mm/page-types.c

··· 22 22 #include <time.h> 23 23 #include <setjmp.h> 24 24 #include <signal.h> 25 + #include <inttypes.h> 25 26 #include <sys/types.h> 26 27 #include <sys/errno.h> 27 28 #include <sys/fcntl.h> ··· 392 391 if (opt_file) 393 392 printf("%lx\t", voff); 394 393 if (opt_list_cgroup) 395 - printf("@%llu\t", (unsigned long long)cgroup0); 394 + printf("@%" PRIu64 "\t", cgroup0); 396 395 if (opt_list_mapcnt) 397 - printf("%lu\t", mapcnt0); 396 + printf("%" PRIu64 "\t", mapcnt0); 398 397 printf("%lx\t%lx\t%s\n", 399 398 index, count, page_flag_name(flags0)); 400 399 } ··· 420 419 if (opt_file) 421 420 printf("%lx\t", voffset); 422 421 if (opt_list_cgroup) 423 - printf("@%llu\t", (unsigned long long)cgroup); 422 + printf("@%" PRIu64 "\t", cgroup) 424 423 if (opt_list_mapcnt) 425 - printf("%lu\t", mapcnt); 424 + printf("%" PRIu64 "\t", mapcnt); 426 425 427 426 printf("%lx\t%s\n", offset, page_flag_name(flags)); 428 427 }

+3 -1

tools/mm/slabinfo.c

··· 1297 1297 slab->cpu_partial_free = get_obj("cpu_partial_free"); 1298 1298 slab->alloc_node_mismatch = get_obj("alloc_node_mismatch"); 1299 1299 slab->deactivate_bypass = get_obj("deactivate_bypass"); 1300 - chdir(".."); 1300 + if (chdir("..")) 1301 + fatal("Unable to chdir from slab ../%s\n", 1302 + slab->name); 1301 1303 if (slab->name[0] == ':') 1302 1304 alias_targets++; 1303 1305 slab++;