Merge branch 'mm-hotfixes-stable' into mm-stable in order to pick up changes required by mm-stable material: hugetlb and damon.

tjh.dev / kernel

fork atom

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

fork atom

Merge branch 'mm-hotfixes-stable' into mm-stable in order to pick up changes required by mm-stable material: hugetlb and damon.

Andrew Morton 6 months ago bc9950b5 103e9062

+239 -119

37 changed files

expand all collapse all

MAINTAINERS

arch

arm64

kernel

machine_kexec_file.c

riscv

kernel

kexec_elf.c

kexec_image.c

machine_kexec_file.c

s390

kernel

kexec_elf.c

kexec_image.c

machine_kexec_file.c

drivers

block

zram

zram_drv.c

nilfs2

sysfs.c

sysfs.h

ocfs2

extent_map.c

proc

generic.c

include

linux

compiler-clang.h

damon.h

kasan.h

swap.h

init

main.c

damon

core.c

lru_sort.c

reclaim.c

sysfs.c

gup.c

hugetlb.c

kasan

shadow.c

khugepaged.c

memory-failure.c

memory_hotplug.c

mlock.c

mremap.c

percpu.c

swap.c

vmalloc.c

vmscan.c

samples

damon

mtier.c

prcl.c

wsse.c

MAINTAINERS

reviewed

··· 16127 16127 M: Mike Rapoport <rppt@kernel.org> 16128 16128 L: linux-mm@kvack.org 16129 16129 S: Maintained 16130 16130 + T: git git://git.kernel.org/pub/scm/linux/kernel/git/rppt/memblock.git 16130 16131 F: include/linux/numa_memblks.h 16131 16132 F: mm/numa.c 16132 16133 F: mm/numa_emulation.c ··· 16195 16194 R: Liam R. Howlett <Liam.Howlett@oracle.com> 16196 16195 R: Vlastimil Babka <vbabka@suse.cz> 16197 16196 R: Harry Yoo <harry.yoo@oracle.com> 16197 16197 + R: Jann Horn <jannh@google.com> 16198 16198 L: linux-mm@kvack.org 16199 16199 S: Maintained 16200 16200 F: include/linux/rmap.h ··· 16241 16239 R: Ryan Roberts <ryan.roberts@arm.com> 16242 16240 R: Dev Jain <dev.jain@arm.com> 16243 16241 R: Barry Song <baohua@kernel.org> 16242 16242 + R: Lance Yang <lance.yang@linux.dev> 16244 16243 L: linux-mm@kvack.org 16245 16244 S: Maintained 16246 16245 W: http://www.linux-mm.org

+1 -1

arch/arm64/kernel/machine_kexec_file.c

reviewed

··· 94 94 char *initrd, unsigned long initrd_len, 95 95 char *cmdline) 96 96 { 97 97 - struct kexec_buf kbuf; 97 97 + struct kexec_buf kbuf = {}; 98 98 void *dtb = NULL; 99 99 unsigned long initrd_load_addr = 0, dtb_len, 100 100 orig_segments = image->nr_segments;

+2 -2

arch/riscv/kernel/kexec_elf.c

reviewed

··· 28 28 int i; 29 29 int ret = 0; 30 30 size_t size; 31 31 - struct kexec_buf kbuf; 31 31 + struct kexec_buf kbuf = {}; 32 32 const struct elf_phdr *phdr; 33 33 34 34 kbuf.image = image; ··· 66 66 { 67 67 int i; 68 68 int ret; 69 69 - struct kexec_buf kbuf; 69 69 + struct kexec_buf kbuf = {}; 70 70 const struct elf_phdr *phdr; 71 71 unsigned long lowest_paddr = ULONG_MAX; 72 72 unsigned long lowest_vaddr = ULONG_MAX;

+1 -1

arch/riscv/kernel/kexec_image.c

reviewed

··· 41 41 struct riscv_image_header *h; 42 42 u64 flags; 43 43 bool be_image, be_kernel; 44 44 - struct kexec_buf kbuf; 44 44 + struct kexec_buf kbuf = {}; 45 45 int ret; 46 46 47 47 /* Check Image header */

+1 -1

arch/riscv/kernel/machine_kexec_file.c

reviewed

··· 261 261 int ret; 262 262 void *fdt; 263 263 unsigned long initrd_pbase = 0UL; 264 264 - struct kexec_buf kbuf; 264 264 + struct kexec_buf kbuf = {}; 265 265 char *modified_cmdline = NULL; 266 266 267 267 kbuf.image = image;

+1 -1

arch/s390/kernel/kexec_elf.c

reviewed

··· 16 16 static int kexec_file_add_kernel_elf(struct kimage *image, 17 17 struct s390_load_data *data) 18 18 { 19 19 - struct kexec_buf buf; 19 19 + struct kexec_buf buf = {}; 20 20 const Elf_Ehdr *ehdr; 21 21 const Elf_Phdr *phdr; 22 22 Elf_Addr entry;

+1 -1

arch/s390/kernel/kexec_image.c

reviewed

··· 16 16 static int kexec_file_add_kernel_image(struct kimage *image, 17 17 struct s390_load_data *data) 18 18 { 19 19 - struct kexec_buf buf; 19 19 + struct kexec_buf buf = {}; 20 20 21 21 buf.image = image; 22 22

+3 -3

arch/s390/kernel/machine_kexec_file.c

reviewed

··· 129 129 static int kexec_file_add_purgatory(struct kimage *image, 130 130 struct s390_load_data *data) 131 131 { 132 132 - struct kexec_buf buf; 132 132 + struct kexec_buf buf = {}; 133 133 int ret; 134 134 135 135 buf.image = image; ··· 152 152 static int kexec_file_add_initrd(struct kimage *image, 153 153 struct s390_load_data *data) 154 154 { 155 155 - struct kexec_buf buf; 155 155 + struct kexec_buf buf = {}; 156 156 int ret; 157 157 158 158 buf.image = image; ··· 184 184 { 185 185 __u32 *lc_ipl_parmblock_ptr; 186 186 unsigned int len, ncerts; 187 187 - struct kexec_buf buf; 187 187 + struct kexec_buf buf = {}; 188 188 unsigned long addr; 189 189 void *ptr, *end; 190 190 int ret;

+3 -5

drivers/block/zram/zram_drv.c

reviewed

··· 1788 1788 u32 index) 1789 1789 { 1790 1790 zram_slot_lock(zram, index); 1791 1791 + zram_free_page(zram, index); 1791 1792 zram_set_flag(zram, index, ZRAM_SAME); 1792 1793 zram_set_handle(zram, index, fill); 1793 1794 zram_slot_unlock(zram, index); ··· 1826 1825 kunmap_local(src); 1827 1826 1828 1827 zram_slot_lock(zram, index); 1828 1828 + zram_free_page(zram, index); 1829 1829 zram_set_flag(zram, index, ZRAM_HUGE); 1830 1830 zram_set_handle(zram, index, handle); 1831 1831 zram_set_obj_size(zram, index, PAGE_SIZE); ··· 1849 1847 struct zcomp_strm *zstrm; 1850 1848 unsigned long element; 1851 1849 bool same_filled; 1852 1852 - 1853 1853 - /* First, free memory allocated to this slot (if any) */ 1854 1854 - zram_slot_lock(zram, index); 1855 1855 - zram_free_page(zram, index); 1856 1856 - zram_slot_unlock(zram, index); 1857 1850 1858 1851 mem = kmap_local_page(page); 1859 1852 same_filled = page_same_filled(mem, &element); ··· 1891 1894 zcomp_stream_put(zstrm); 1892 1895 1893 1896 zram_slot_lock(zram, index); 1897 1897 + zram_free_page(zram, index); 1894 1898 zram_set_handle(zram, index, handle); 1895 1899 zram_set_obj_size(zram, index, comp_len); 1896 1900 zram_slot_unlock(zram, index);

+2 -2

fs/nilfs2/sysfs.c

reviewed

··· 1075 1075 ************************************************************************/ 1076 1076 1077 1077 static ssize_t nilfs_feature_revision_show(struct kobject *kobj, 1078 1078 - struct attribute *attr, char *buf) 1078 1078 + struct kobj_attribute *attr, char *buf) 1079 1079 { 1080 1080 return sysfs_emit(buf, "%d.%d\n", 1081 1081 NILFS_CURRENT_REV, NILFS_MINOR_REV); ··· 1087 1087 "(1) revision\n\tshow current revision of NILFS file system driver.\n"; 1088 1088 1089 1089 static ssize_t nilfs_feature_README_show(struct kobject *kobj, 1090 1090 - struct attribute *attr, 1090 1090 + struct kobj_attribute *attr, 1091 1091 char *buf) 1092 1092 { 1093 1093 return sysfs_emit(buf, features_readme_str);

+4 -4

fs/nilfs2/sysfs.h

reviewed

··· 50 50 struct completion sg_segments_kobj_unregister; 51 51 }; 52 52 53 53 - #define NILFS_COMMON_ATTR_STRUCT(name) \ 53 53 + #define NILFS_KOBJ_ATTR_STRUCT(name) \ 54 54 struct nilfs_##name##_attr { \ 55 55 struct attribute attr; \ 56 56 - ssize_t (*show)(struct kobject *, struct attribute *, \ 56 56 + ssize_t (*show)(struct kobject *, struct kobj_attribute *, \ 57 57 char *); \ 58 58 - ssize_t (*store)(struct kobject *, struct attribute *, \ 58 58 + ssize_t (*store)(struct kobject *, struct kobj_attribute *, \ 59 59 const char *, size_t); \ 60 60 } 61 61 62 62 - NILFS_COMMON_ATTR_STRUCT(feature); 62 62 + NILFS_KOBJ_ATTR_STRUCT(feature); 63 63 64 64 #define NILFS_DEV_ATTR_STRUCT(name) \ 65 65 struct nilfs_##name##_attr { \

+9 -1

fs/ocfs2/extent_map.c

reviewed

··· 706 706 * it not only handles the fiemap for inlined files, but also deals 707 707 * with the fast symlink, cause they have no difference for extent 708 708 * mapping per se. 709 709 + * 710 710 + * Must be called with ip_alloc_sem semaphore held. 709 711 */ 710 712 static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh, 711 713 struct fiemap_extent_info *fieinfo, ··· 719 717 u64 phys; 720 718 u32 flags = FIEMAP_EXTENT_DATA_INLINE|FIEMAP_EXTENT_LAST; 721 719 struct ocfs2_inode_info *oi = OCFS2_I(inode); 720 720 + lockdep_assert_held_read(&oi->ip_alloc_sem); 722 721 723 722 di = (struct ocfs2_dinode *)di_bh->b_data; 724 723 if (ocfs2_inode_is_fast_symlink(inode)) ··· 735 732 phys += offsetof(struct ocfs2_dinode, 736 733 id2.i_data.id_data); 737 734 735 735 + /* Release the ip_alloc_sem to prevent deadlock on page fault */ 736 736 + up_read(&OCFS2_I(inode)->ip_alloc_sem); 738 737 ret = fiemap_fill_next_extent(fieinfo, 0, phys, id_count, 739 738 flags); 739 739 + down_read(&OCFS2_I(inode)->ip_alloc_sem); 740 740 if (ret < 0) 741 741 return ret; 742 742 } ··· 808 802 len_bytes = (u64)le16_to_cpu(rec.e_leaf_clusters) << osb->s_clustersize_bits; 809 803 phys_bytes = le64_to_cpu(rec.e_blkno) << osb->sb->s_blocksize_bits; 810 804 virt_bytes = (u64)le32_to_cpu(rec.e_cpos) << osb->s_clustersize_bits; 811 811 - 805 805 + /* Release the ip_alloc_sem to prevent deadlock on page fault */ 806 806 + up_read(&OCFS2_I(inode)->ip_alloc_sem); 812 807 ret = fiemap_fill_next_extent(fieinfo, virt_bytes, phys_bytes, 813 808 len_bytes, fe_flags); 809 809 + down_read(&OCFS2_I(inode)->ip_alloc_sem); 814 810 if (ret) 815 811 break; 816 812

+2 -1

fs/proc/generic.c

reviewed

··· 393 393 if (proc_alloc_inum(&dp->low_ino)) 394 394 goto out_free_entry; 395 395 396 396 - pde_set_flags(dp); 396 396 + if (!S_ISDIR(dp->mode)) 397 397 + pde_set_flags(dp); 397 398 398 399 write_lock(&proc_subdir_lock); 399 400 dp->parent = dir;

+25 -6

include/linux/compiler-clang.h

reviewed

··· 18 18 #define KASAN_ABI_VERSION 5 19 19 20 20 /* 21 21 + * Clang 22 added preprocessor macros to match GCC, in hopes of eventually 22 22 + * dropping __has_feature support for sanitizers: 23 23 + * https://github.com/llvm/llvm-project/commit/568c23bbd3303518c5056d7f03444dae4fdc8a9c 24 24 + * Create these macros for older versions of clang so that it is easy to clean 25 25 + * up once the minimum supported version of LLVM for building the kernel always 26 26 + * creates these macros. 27 27 + * 21 28 * Note: Checking __has_feature(*_sanitizer) is only true if the feature is 22 29 * enabled. Therefore it is not required to additionally check defined(CONFIG_*) 23 30 * to avoid adding redundant attributes in other configurations. 24 31 */ 25 25 - 26 26 - #if __has_feature(address_sanitizer) || __has_feature(hwaddress_sanitizer) 27 27 - /* Emulate GCC's __SANITIZE_ADDRESS__ flag */ 32 32 + #if __has_feature(address_sanitizer) && !defined(__SANITIZE_ADDRESS__) 28 33 #define __SANITIZE_ADDRESS__ 34 34 + #endif 35 35 + #if __has_feature(hwaddress_sanitizer) && !defined(__SANITIZE_HWADDRESS__) 36 36 + #define __SANITIZE_HWADDRESS__ 37 37 + #endif 38 38 + #if __has_feature(thread_sanitizer) && !defined(__SANITIZE_THREAD__) 39 39 + #define __SANITIZE_THREAD__ 40 40 + #endif 41 41 + 42 42 + /* 43 43 + * Treat __SANITIZE_HWADDRESS__ the same as __SANITIZE_ADDRESS__ in the kernel. 44 44 + */ 45 45 + #ifdef __SANITIZE_HWADDRESS__ 46 46 + #define __SANITIZE_ADDRESS__ 47 47 + #endif 48 48 + 49 49 + #ifdef __SANITIZE_ADDRESS__ 29 50 #define __no_sanitize_address \ 30 51 __attribute__((no_sanitize("address", "hwaddress"))) 31 52 #else 32 53 #define __no_sanitize_address 33 54 #endif 34 55 35 35 - #if __has_feature(thread_sanitizer) 36 36 - /* emulate gcc's __SANITIZE_THREAD__ flag */ 37 37 - #define __SANITIZE_THREAD__ 56 56 + #ifdef __SANITIZE_THREAD__ 38 57 #define __no_sanitize_thread \ 39 58 __attribute__((no_sanitize("thread"))) 40 59 #else

include/linux/damon.h

reviewed

··· 636 636 * @data: Data that will be passed to @fn. 637 637 * @repeat: Repeat invocations. 638 638 * @return_code: Return code from @fn invocation. 639 639 + * @dealloc_on_cancel: De-allocate when canceled. 639 640 * 640 641 * Control damon_call(), which requests specific kdamond to invoke a given 641 642 * function. Refer to damon_call() for more details. ··· 646 645 void *data; 647 646 bool repeat; 648 647 int return_code; 648 648 + bool dealloc_on_cancel; 649 649 /* private: internal use only */ 650 650 /* informs if the kdamond finished handling of the request */ 651 651 struct completion completion;

+3 -3

include/linux/kasan.h

reviewed

··· 562 562 #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) 563 563 564 564 void kasan_populate_early_vm_area_shadow(void *start, unsigned long size); 565 565 - int kasan_populate_vmalloc(unsigned long addr, unsigned long size); 565 565 + int kasan_populate_vmalloc(unsigned long addr, unsigned long size, gfp_t gfp_mask); 566 566 void kasan_release_vmalloc(unsigned long start, unsigned long end, 567 567 unsigned long free_region_start, 568 568 unsigned long free_region_end, ··· 574 574 unsigned long size) 575 575 { } 576 576 static inline int kasan_populate_vmalloc(unsigned long start, 577 577 - unsigned long size) 577 577 + unsigned long size, gfp_t gfp_mask) 578 578 { 579 579 return 0; 580 580 } ··· 610 610 static inline void kasan_populate_early_vm_area_shadow(void *start, 611 611 unsigned long size) { } 612 612 static inline int kasan_populate_vmalloc(unsigned long start, 613 613 - unsigned long size) 613 613 + unsigned long size, gfp_t gfp_mask) 614 614 { 615 615 return 0; 616 616 }

+10

include/linux/swap.h

reviewed

··· 381 381 void mark_page_accessed(struct page *); 382 382 void folio_mark_accessed(struct folio *); 383 383 384 384 + static inline bool folio_may_be_lru_cached(struct folio *folio) 385 385 + { 386 386 + /* 387 387 + * Holding PMD-sized folios in per-CPU LRU cache unbalances accounting. 388 388 + * Holding small numbers of low-order mTHP folios in per-CPU LRU cache 389 389 + * will be sensible, but nobody has implemented and tested that yet. 390 390 + */ 391 391 + return !folio_test_large(folio); 392 392 + } 393 393 + 384 394 extern atomic_t lru_disable_count; 385 395 386 396 static inline bool lru_cache_disabled(void)

+1 -1

init/main.c

reviewed

··· 956 956 sort_main_extable(); 957 957 trap_init(); 958 958 mm_core_init(); 959 959 + maple_tree_init(); 959 960 poking_init(); 960 961 ftrace_init(); 961 962 ··· 974 973 "Interrupts were enabled *very* early, fixing it\n")) 975 974 local_irq_disable(); 976 975 radix_tree_init(); 977 977 - maple_tree_init(); 978 976 979 977 /* 980 978 * Set up housekeeping before setting up workqueues to allow the unbound

+10 -2

mm/damon/core.c

reviewed

··· 2141 2141 if (!quota->ms && !quota->sz && list_empty(&quota->goals)) 2142 2142 return; 2143 2143 2144 2144 + /* First charge window */ 2145 2145 + if (!quota->total_charged_sz && !quota->charged_from) 2146 2146 + quota->charged_from = jiffies; 2147 2147 + 2144 2148 /* New charge window starts */ 2145 2149 if (time_after_eq(jiffies, quota->charged_from + 2146 2150 msecs_to_jiffies(quota->reset_interval))) { ··· 2510 2506 mutex_lock(&ctx->call_controls_lock); 2511 2507 list_del(&control->list); 2512 2508 mutex_unlock(&ctx->call_controls_lock); 2513 2513 - if (!control->repeat) 2509 2509 + if (!control->repeat) { 2514 2510 complete(&control->completion); 2515 2515 - else 2511 2511 + } else if (control->canceled && control->dealloc_on_cancel) { 2512 2512 + kfree(control); 2513 2513 + continue; 2514 2514 + } else { 2516 2515 list_add(&control->list, &repeat_controls); 2516 2516 + } 2517 2517 } 2518 2518 control = list_first_entry_or_null(&repeat_controls, 2519 2519 struct damon_call_control, list);

mm/damon/lru_sort.c

reviewed

··· 198 198 if (err) 199 199 return err; 200 200 201 201 + if (!damon_lru_sort_mon_attrs.sample_interval) { 202 202 + err = -EINVAL; 203 203 + goto out; 204 204 + } 205 205 + 201 206 err = damon_set_attrs(ctx, &damon_lru_sort_mon_attrs); 202 207 if (err) 203 208 goto out;

mm/damon/reclaim.c

reviewed

··· 194 194 if (err) 195 195 return err; 196 196 197 197 + if (!damon_reclaim_mon_attrs.aggr_interval) { 198 198 + err = -EINVAL; 199 199 + goto out; 200 200 + } 201 201 + 197 202 err = damon_set_attrs(param_ctx, &damon_reclaim_mon_attrs); 198 203 if (err) 199 204 goto out;

+24 -13

mm/damon/sysfs.c

reviewed

··· 1292 1292 { 1293 1293 struct damon_sysfs_kdamond *kdamond = container_of(kobj, 1294 1294 struct damon_sysfs_kdamond, kobj); 1295 1295 - struct damon_ctx *ctx = kdamond->damon_ctx; 1296 1296 - bool running; 1295 1295 + struct damon_ctx *ctx; 1296 1296 + bool running = false; 1297 1297 1298 1298 - if (!ctx) 1299 1299 - running = false; 1300 1300 - else 1298 1298 + if (!mutex_trylock(&damon_sysfs_lock)) 1299 1299 + return -EBUSY; 1300 1300 + 1301 1301 + ctx = kdamond->damon_ctx; 1302 1302 + if (ctx) 1301 1303 running = damon_is_running(ctx); 1304 1304 + 1305 1305 + mutex_unlock(&damon_sysfs_lock); 1302 1306 1303 1307 return sysfs_emit(buf, "%s\n", running ? 1304 1308 damon_sysfs_cmd_strs[DAMON_SYSFS_CMD_ON] : ··· 1569 1565 return 0; 1570 1566 } 1571 1567 1572 1572 - static struct damon_call_control damon_sysfs_repeat_call_control = { 1573 1573 - .fn = damon_sysfs_repeat_call_fn, 1574 1574 - .repeat = true, 1575 1575 - }; 1576 1576 - 1577 1568 static int damon_sysfs_turn_damon_on(struct damon_sysfs_kdamond *kdamond) 1578 1569 { 1579 1570 struct damon_ctx *ctx; 1571 1571 + struct damon_call_control *repeat_call_control; 1580 1572 int err; 1581 1573 1582 1574 if (damon_sysfs_kdamond_running(kdamond)) ··· 1585 1585 damon_destroy_ctx(kdamond->damon_ctx); 1586 1586 kdamond->damon_ctx = NULL; 1587 1587 1588 1588 + repeat_call_control = kmalloc(sizeof(*repeat_call_control), 1589 1589 + GFP_KERNEL); 1590 1590 + if (!repeat_call_control) 1591 1591 + return -ENOMEM; 1592 1592 + 1588 1593 ctx = damon_sysfs_build_ctx(kdamond->contexts->contexts_arr[0]); 1589 1589 - if (IS_ERR(ctx)) 1594 1594 + if (IS_ERR(ctx)) { 1595 1595 + kfree(repeat_call_control); 1590 1596 return PTR_ERR(ctx); 1597 1597 + } 1591 1598 err = damon_start(&ctx, 1, false); 1592 1599 if (err) { 1600 1600 + kfree(repeat_call_control); 1593 1601 damon_destroy_ctx(ctx); 1594 1602 return err; 1595 1603 } 1596 1604 kdamond->damon_ctx = ctx; 1597 1605 1598 1598 - damon_sysfs_repeat_call_control.data = kdamond; 1599 1599 - damon_call(ctx, &damon_sysfs_repeat_call_control); 1606 1606 + repeat_call_control->fn = damon_sysfs_repeat_call_fn; 1607 1607 + repeat_call_control->data = kdamond; 1608 1608 + repeat_call_control->repeat = true; 1609 1609 + repeat_call_control->dealloc_on_cancel = true; 1610 1610 + damon_call(ctx, repeat_call_control); 1600 1611 return err; 1601 1612 } 1602 1613

+11 -3

mm/gup.c

reviewed

··· 2287 2287 struct pages_or_folios *pofs) 2288 2288 { 2289 2289 unsigned long collected = 0; 2290 2290 - bool drain_allow = true; 2291 2290 struct folio *folio; 2291 2291 + int drained = 0; 2292 2292 long i = 0; 2293 2293 2294 2294 for (folio = pofs_get_folio(pofs, i); folio; ··· 2307 2307 continue; 2308 2308 } 2309 2309 2310 2310 - if (!folio_test_lru(folio) && drain_allow) { 2310 2310 + if (drained == 0 && folio_may_be_lru_cached(folio) && 2311 2311 + folio_ref_count(folio) != 2312 2312 + folio_expected_ref_count(folio) + 1) { 2313 2313 + lru_add_drain(); 2314 2314 + drained = 1; 2315 2315 + } 2316 2316 + if (drained == 1 && folio_may_be_lru_cached(folio) && 2317 2317 + folio_ref_count(folio) != 2318 2318 + folio_expected_ref_count(folio) + 1) { 2311 2319 lru_add_drain_all(); 2312 2312 - drain_allow = false; 2320 2320 + drained = 2; 2313 2321 } 2314 2322 2315 2323 if (!folio_isolate_lru(folio))

+6 -3

mm/hugetlb.c

reviewed

··· 5854 5854 spinlock_t *ptl; 5855 5855 struct hstate *h = hstate_vma(vma); 5856 5856 unsigned long sz = huge_page_size(h); 5857 5857 - bool adjust_reservation = false; 5857 5857 + bool adjust_reservation; 5858 5858 unsigned long last_addr_mask; 5859 5859 bool force_flush = false; 5860 5860 ··· 5947 5947 sz); 5948 5948 hugetlb_count_sub(pages_per_huge_page(h), mm); 5949 5949 hugetlb_remove_rmap(folio); 5950 5950 + spin_unlock(ptl); 5950 5951 5951 5952 /* 5952 5953 * Restore the reservation for anonymous page, otherwise the ··· 5955 5954 * If there we are freeing a surplus, do not set the restore 5956 5955 * reservation bit. 5957 5956 */ 5957 5957 + adjust_reservation = false; 5958 5958 + 5959 5959 + spin_lock_irq(&hugetlb_lock); 5958 5960 if (!h->surplus_huge_pages && __vma_private_lock(vma) && 5959 5961 folio_test_anon(folio)) { 5960 5962 folio_set_hugetlb_restore_reserve(folio); 5961 5963 /* Reservation to be adjusted after the spin lock */ 5962 5964 adjust_reservation = true; 5963 5965 } 5964 5964 - 5965 5965 - spin_unlock(ptl); 5966 5966 + spin_unlock_irq(&hugetlb_lock); 5966 5967 5967 5968 /* 5968 5969 * Adjust the reservation for the region that will have the

+24 -7

mm/kasan/shadow.c

reviewed

··· 336 336 } 337 337 } 338 338 339 339 - static int ___alloc_pages_bulk(struct page **pages, int nr_pages) 339 339 + static int ___alloc_pages_bulk(struct page **pages, int nr_pages, gfp_t gfp_mask) 340 340 { 341 341 unsigned long nr_populated, nr_total = nr_pages; 342 342 struct page **page_array = pages; 343 343 344 344 while (nr_pages) { 345 345 - nr_populated = alloc_pages_bulk(GFP_KERNEL, nr_pages, pages); 345 345 + nr_populated = alloc_pages_bulk(gfp_mask, nr_pages, pages); 346 346 if (!nr_populated) { 347 347 ___free_pages_bulk(page_array, nr_total - nr_pages); 348 348 return -ENOMEM; ··· 354 354 return 0; 355 355 } 356 356 357 357 - static int __kasan_populate_vmalloc(unsigned long start, unsigned long end) 357 357 + static int __kasan_populate_vmalloc(unsigned long start, unsigned long end, gfp_t gfp_mask) 358 358 { 359 359 unsigned long nr_pages, nr_total = PFN_UP(end - start); 360 360 struct vmalloc_populate_data data; 361 361 + unsigned int flags; 361 362 int ret = 0; 362 363 363 363 - data.pages = (struct page **)__get_free_page(GFP_KERNEL | __GFP_ZERO); 364 364 + data.pages = (struct page **)__get_free_page(gfp_mask | __GFP_ZERO); 364 365 if (!data.pages) 365 366 return -ENOMEM; 366 367 367 368 while (nr_total) { 368 369 nr_pages = min(nr_total, PAGE_SIZE / sizeof(data.pages[0])); 369 369 - ret = ___alloc_pages_bulk(data.pages, nr_pages); 370 370 + ret = ___alloc_pages_bulk(data.pages, nr_pages, gfp_mask); 370 371 if (ret) 371 372 break; 372 373 373 374 data.start = start; 375 375 + 376 376 + /* 377 377 + * page tables allocations ignore external gfp mask, enforce it 378 378 + * by the scope API 379 379 + */ 380 380 + if ((gfp_mask & (__GFP_FS | __GFP_IO)) == __GFP_IO) 381 381 + flags = memalloc_nofs_save(); 382 382 + else if ((gfp_mask & (__GFP_FS | __GFP_IO)) == 0) 383 383 + flags = memalloc_noio_save(); 384 384 + 374 385 ret = apply_to_page_range(&init_mm, start, nr_pages * PAGE_SIZE, 375 386 kasan_populate_vmalloc_pte, &data); 387 387 + 388 388 + if ((gfp_mask & (__GFP_FS | __GFP_IO)) == __GFP_IO) 389 389 + memalloc_nofs_restore(flags); 390 390 + else if ((gfp_mask & (__GFP_FS | __GFP_IO)) == 0) 391 391 + memalloc_noio_restore(flags); 392 392 + 376 393 ___free_pages_bulk(data.pages, nr_pages); 377 394 if (ret) 378 395 break; ··· 403 386 return ret; 404 387 } 405 388 406 406 - int kasan_populate_vmalloc(unsigned long addr, unsigned long size) 389 389 + int kasan_populate_vmalloc(unsigned long addr, unsigned long size, gfp_t gfp_mask) 407 390 { 408 391 unsigned long shadow_start, shadow_end; 409 392 int ret; ··· 432 415 shadow_start = PAGE_ALIGN_DOWN(shadow_start); 433 416 shadow_end = PAGE_ALIGN(shadow_end); 434 417 435 435 - ret = __kasan_populate_vmalloc(shadow_start, shadow_end); 418 418 + ret = __kasan_populate_vmalloc(shadow_start, shadow_end, gfp_mask); 436 419 if (ret) 437 420 return ret; 438 421

+2 -2

mm/khugepaged.c

reviewed

··· 1417 1417 */ 1418 1418 if (cc->is_khugepaged && 1419 1419 (pte_young(pteval) || folio_test_young(folio) || 1420 1420 - folio_test_referenced(folio) || mmu_notifier_test_young(vma->vm_mm, 1421 1421 - address))) 1420 1420 + folio_test_referenced(folio) || 1421 1421 + mmu_notifier_test_young(vma->vm_mm, _address))) 1422 1422 referenced++; 1423 1423 } 1424 1424 if (!writable) {

+9 -11

mm/memory-failure.c

reviewed

··· 956 956 [MF_MSG_BUDDY] = "free buddy page", 957 957 [MF_MSG_DAX] = "dax page", 958 958 [MF_MSG_UNSPLIT_THP] = "unsplit thp", 959 959 - [MF_MSG_ALREADY_POISONED] = "already poisoned", 959 959 + [MF_MSG_ALREADY_POISONED] = "already poisoned page", 960 960 [MF_MSG_UNKNOWN] = "unknown page", 961 961 }; 962 962 ··· 1349 1349 { 1350 1350 trace_memory_failure_event(pfn, type, result); 1351 1351 1352 1352 - num_poisoned_pages_inc(pfn); 1353 1353 - 1354 1354 - update_per_node_mf_stats(pfn, result); 1352 1352 + if (type != MF_MSG_ALREADY_POISONED) { 1353 1353 + num_poisoned_pages_inc(pfn); 1354 1354 + update_per_node_mf_stats(pfn, result); 1355 1355 + } 1355 1356 1356 1357 pr_err("%#lx: recovery action for %s: %s\n", 1357 1358 pfn, action_page_types[type], action_name[result]); ··· 2095 2094 *hugetlb = 0; 2096 2095 return 0; 2097 2096 } else if (res == -EHWPOISON) { 2098 2098 - pr_err("%#lx: already hardware poisoned\n", pfn); 2099 2097 if (flags & MF_ACTION_REQUIRED) { 2100 2098 folio = page_folio(p); 2101 2099 res = kill_accessing_process(current, folio_pfn(folio), flags); 2102 2102 - action_result(pfn, MF_MSG_ALREADY_POISONED, MF_FAILED); 2103 2100 } 2101 2101 + action_result(pfn, MF_MSG_ALREADY_POISONED, MF_FAILED); 2104 2102 return res; 2105 2103 } else if (res == -EBUSY) { 2106 2104 if (!(flags & MF_NO_RETRY)) { ··· 2285 2285 goto unlock_mutex; 2286 2286 2287 2287 if (TestSetPageHWPoison(p)) { 2288 2288 - pr_err("%#lx: already hardware poisoned\n", pfn); 2289 2288 res = -EHWPOISON; 2290 2289 if (flags & MF_ACTION_REQUIRED) 2291 2290 res = kill_accessing_process(current, pfn, flags); ··· 2568 2569 static DEFINE_RATELIMIT_STATE(unpoison_rs, DEFAULT_RATELIMIT_INTERVAL, 2569 2570 DEFAULT_RATELIMIT_BURST); 2570 2571 2571 2571 - if (!pfn_valid(pfn)) 2572 2572 - return -ENXIO; 2573 2573 - 2574 2574 - p = pfn_to_page(pfn); 2572 2572 + p = pfn_to_online_page(pfn); 2573 2573 + if (!p) 2574 2574 + return -EIO; 2575 2575 folio = page_folio(p); 2576 2576 2577 2577 mutex_lock(&mf_mutex);

+8 -2

mm/memory_hotplug.c

reviewed

··· 1815 1815 pfn = folio_pfn(folio) + folio_nr_pages(folio) - 1; 1816 1816 1817 1817 if (folio_contain_hwpoisoned_page(folio)) { 1818 1818 - if (WARN_ON(folio_test_lru(folio))) 1819 1819 - folio_isolate_lru(folio); 1818 1818 + /* 1819 1819 + * unmap_poisoned_folio() cannot handle large folios 1820 1820 + * in all cases yet. 1821 1821 + */ 1822 1822 + if (folio_test_large(folio) && !folio_test_hugetlb(folio)) 1823 1823 + goto put_folio; 1824 1824 + if (folio_test_lru(folio) && !folio_isolate_lru(folio)) 1825 1825 + goto put_folio; 1820 1826 if (folio_mapped(folio)) { 1821 1827 folio_lock(folio); 1822 1828 unmap_poisoned_folio(folio, pfn, false);

+3 -3

mm/mlock.c

reviewed

··· 255 255 256 256 folio_get(folio); 257 257 if (!folio_batch_add(fbatch, mlock_lru(folio)) || 258 258 - folio_test_large(folio) || lru_cache_disabled()) 258 258 + !folio_may_be_lru_cached(folio) || lru_cache_disabled()) 259 259 mlock_folio_batch(fbatch); 260 260 local_unlock(&mlock_fbatch.lock); 261 261 } ··· 278 278 279 279 folio_get(folio); 280 280 if (!folio_batch_add(fbatch, mlock_new(folio)) || 281 281 - folio_test_large(folio) || lru_cache_disabled()) 281 281 + !folio_may_be_lru_cached(folio) || lru_cache_disabled()) 282 282 mlock_folio_batch(fbatch); 283 283 local_unlock(&mlock_fbatch.lock); 284 284 } ··· 299 299 */ 300 300 folio_get(folio); 301 301 if (!folio_batch_add(fbatch, folio) || 302 302 - folio_test_large(folio) || lru_cache_disabled()) 302 302 + !folio_may_be_lru_cached(folio) || lru_cache_disabled()) 303 303 mlock_folio_batch(fbatch); 304 304 local_unlock(&mlock_fbatch.lock); 305 305 }

+6 -3

mm/mremap.c

reviewed

··· 1774 1774 if (!vrm->new_len) 1775 1775 return -EINVAL; 1776 1776 1777 1777 - /* Is the new length or address silly? */ 1778 1778 - if (vrm->new_len > TASK_SIZE || 1779 1779 - vrm->new_addr > TASK_SIZE - vrm->new_len) 1777 1777 + /* Is the new length silly? */ 1778 1778 + if (vrm->new_len > TASK_SIZE) 1780 1779 return -EINVAL; 1781 1780 1782 1781 /* Remainder of checks are for cases with specific new_addr. */ 1783 1782 if (!vrm_implies_new_addr(vrm)) 1784 1783 return 0; 1784 1784 + 1785 1785 + /* Is the new address silly? */ 1786 1786 + if (vrm->new_addr > TASK_SIZE - vrm->new_len) 1787 1787 + return -EINVAL; 1785 1788 1786 1789 /* The new address must be page-aligned. */ 1787 1790 if (offset_in_page(vrm->new_addr))

+12 -8

mm/percpu.c

reviewed

··· 1734 1734 bool is_atomic; 1735 1735 bool do_warn; 1736 1736 struct obj_cgroup *objcg = NULL; 1737 1737 - static int warn_limit = 10; 1737 1737 + static atomic_t warn_limit = ATOMIC_INIT(10); 1738 1738 struct pcpu_chunk *chunk, *next; 1739 1739 const char *err; 1740 1740 int slot, off, cpu, ret; ··· 1904 1904 fail: 1905 1905 trace_percpu_alloc_percpu_fail(reserved, is_atomic, size, align); 1906 1906 1907 1907 - if (do_warn && warn_limit) { 1908 1908 - pr_warn("allocation failed, size=%zu align=%zu atomic=%d, %s\n", 1909 1909 - size, align, is_atomic, err); 1910 1910 - if (!is_atomic) 1911 1911 - dump_stack(); 1912 1912 - if (!--warn_limit) 1913 1913 - pr_info("limit reached, disable warning\n"); 1907 1907 + if (do_warn) { 1908 1908 + int remaining = atomic_dec_if_positive(&warn_limit); 1909 1909 + 1910 1910 + if (remaining >= 0) { 1911 1911 + pr_warn("allocation failed, size=%zu align=%zu atomic=%d, %s\n", 1912 1912 + size, align, is_atomic, err); 1913 1913 + if (!is_atomic) 1914 1914 + dump_stack(); 1915 1915 + if (remaining == 0) 1916 1916 + pr_info("limit reached, disable warning\n"); 1917 1917 + } 1914 1918 } 1915 1919 1916 1920 if (is_atomic) {

+26 -24

mm/swap.c

reviewed

··· 164 164 for (i = 0; i < folio_batch_count(fbatch); i++) { 165 165 struct folio *folio = fbatch->folios[i]; 166 166 167 167 + /* block memcg migration while the folio moves between lru */ 168 168 + if (move_fn != lru_add && !folio_test_clear_lru(folio)) 169 169 + continue; 170 170 + 167 171 folio_lruvec_relock_irqsave(folio, &lruvec, &flags); 168 172 move_fn(lruvec, folio); 169 173 ··· 180 176 } 181 177 182 178 static void __folio_batch_add_and_move(struct folio_batch __percpu *fbatch, 183 183 - struct folio *folio, move_fn_t move_fn, 184 184 - bool on_lru, bool disable_irq) 179 179 + struct folio *folio, move_fn_t move_fn, bool disable_irq) 185 180 { 186 181 unsigned long flags; 187 187 - 188 188 - if (on_lru && !folio_test_clear_lru(folio)) 189 189 - return; 190 182 191 183 folio_get(folio); 192 184 ··· 191 191 else 192 192 local_lock(&cpu_fbatches.lock); 193 193 194 194 - if (!folio_batch_add(this_cpu_ptr(fbatch), folio) || folio_test_large(folio) || 195 195 - lru_cache_disabled()) 194 194 + if (!folio_batch_add(this_cpu_ptr(fbatch), folio) || 195 195 + !folio_may_be_lru_cached(folio) || lru_cache_disabled()) 196 196 folio_batch_move_lru(this_cpu_ptr(fbatch), move_fn); 197 197 198 198 if (disable_irq) ··· 201 201 local_unlock(&cpu_fbatches.lock); 202 202 } 203 203 204 204 - #define folio_batch_add_and_move(folio, op, on_lru) \ 205 205 - __folio_batch_add_and_move( \ 206 206 - &cpu_fbatches.op, \ 207 207 - folio, \ 208 208 - op, \ 209 209 - on_lru, \ 210 210 - offsetof(struct cpu_fbatches, op) >= offsetof(struct cpu_fbatches, lock_irq) \ 204 204 + #define folio_batch_add_and_move(folio, op) \ 205 205 + __folio_batch_add_and_move( \ 206 206 + &cpu_fbatches.op, \ 207 207 + folio, \ 208 208 + op, \ 209 209 + offsetof(struct cpu_fbatches, op) >= \ 210 210 + offsetof(struct cpu_fbatches, lock_irq) \ 211 211 ) 212 212 213 213 static void lru_move_tail(struct lruvec *lruvec, struct folio *folio) ··· 231 231 void folio_rotate_reclaimable(struct folio *folio) 232 232 { 233 233 if (folio_test_locked(folio) || folio_test_dirty(folio) || 234 234 - folio_test_unevictable(folio)) 234 234 + folio_test_unevictable(folio) || !folio_test_lru(folio)) 235 235 return; 236 236 237 237 - folio_batch_add_and_move(folio, lru_move_tail, true); 237 237 + folio_batch_add_and_move(folio, lru_move_tail); 238 238 } 239 239 240 240 void lru_note_cost_unlock_irq(struct lruvec *lruvec, bool file, ··· 328 328 329 329 void folio_activate(struct folio *folio) 330 330 { 331 331 - if (folio_test_active(folio) || folio_test_unevictable(folio)) 331 331 + if (folio_test_active(folio) || folio_test_unevictable(folio) || 332 332 + !folio_test_lru(folio)) 332 333 return; 333 334 334 334 - folio_batch_add_and_move(folio, lru_activate, true); 335 335 + folio_batch_add_and_move(folio, lru_activate); 335 336 } 336 337 337 338 #else ··· 508 507 lru_gen_in_fault() && !(current->flags & PF_MEMALLOC)) 509 508 folio_set_active(folio); 510 509 511 511 - folio_batch_add_and_move(folio, lru_add, false); 510 510 + folio_batch_add_and_move(folio, lru_add); 512 511 } 513 512 EXPORT_SYMBOL(folio_add_lru); 514 513 ··· 686 685 void deactivate_file_folio(struct folio *folio) 687 686 { 688 687 /* Deactivating an unevictable folio will not accelerate reclaim */ 689 689 - if (folio_test_unevictable(folio)) 688 688 + if (folio_test_unevictable(folio) || !folio_test_lru(folio)) 690 689 return; 691 690 692 691 if (lru_gen_enabled() && lru_gen_clear_refs(folio)) 693 692 return; 694 693 695 695 - folio_batch_add_and_move(folio, lru_deactivate_file, true); 694 694 + folio_batch_add_and_move(folio, lru_deactivate_file); 696 695 } 697 696 698 697 /* ··· 705 704 */ 706 705 void folio_deactivate(struct folio *folio) 707 706 { 708 708 - if (folio_test_unevictable(folio)) 707 707 + if (folio_test_unevictable(folio) || !folio_test_lru(folio)) 709 708 return; 710 709 711 710 if (lru_gen_enabled() ? lru_gen_clear_refs(folio) : !folio_test_active(folio)) 712 711 return; 713 712 714 714 - folio_batch_add_and_move(folio, lru_deactivate, true); 713 713 + folio_batch_add_and_move(folio, lru_deactivate); 715 714 } 716 715 717 716 /** ··· 724 723 void folio_mark_lazyfree(struct folio *folio) 725 724 { 726 725 if (!folio_test_anon(folio) || !folio_test_swapbacked(folio) || 726 726 + !folio_test_lru(folio) || 727 727 folio_test_swapcache(folio) || folio_test_unevictable(folio)) 728 728 return; 729 729 730 730 - folio_batch_add_and_move(folio, lru_lazyfree, true); 730 730 + folio_batch_add_and_move(folio, lru_lazyfree); 731 731 } 732 732 733 733 void lru_add_drain(void)

+4 -4

mm/vmalloc.c

reviewed

··· 2026 2026 if (unlikely(!vmap_initialized)) 2027 2027 return ERR_PTR(-EBUSY); 2028 2028 2029 2029 + /* Only reclaim behaviour flags are relevant. */ 2030 2030 + gfp_mask = gfp_mask & GFP_RECLAIM_MASK; 2029 2031 might_sleep(); 2030 2032 2031 2033 /* ··· 2040 2038 */ 2041 2039 va = node_alloc(size, align, vstart, vend, &addr, &vn_id); 2042 2040 if (!va) { 2043 2043 - gfp_mask = gfp_mask & GFP_RECLAIM_MASK; 2044 2044 - 2045 2041 va = kmem_cache_alloc_node(vmap_area_cachep, gfp_mask, node); 2046 2042 if (unlikely(!va)) 2047 2043 return ERR_PTR(-ENOMEM); ··· 2089 2089 BUG_ON(va->va_start < vstart); 2090 2090 BUG_ON(va->va_end > vend); 2091 2091 2092 2092 - ret = kasan_populate_vmalloc(addr, size); 2092 2092 + ret = kasan_populate_vmalloc(addr, size, gfp_mask); 2093 2093 if (ret) { 2094 2094 free_vmap_area(va); 2095 2095 return ERR_PTR(ret); ··· 4845 4845 4846 4846 /* populate the kasan shadow space */ 4847 4847 for (area = 0; area < nr_vms; area++) { 4848 4848 - if (kasan_populate_vmalloc(vas[area]->va_start, sizes[area])) 4848 4848 + if (kasan_populate_vmalloc(vas[area]->va_start, sizes[area], GFP_KERNEL)) 4849 4849 goto err_free_shadow; 4850 4850 } 4851 4851

+1 -1

mm/vmscan.c

reviewed

··· 4500 4500 } 4501 4501 4502 4502 /* ineligible */ 4503 4503 - if (!folio_test_lru(folio) || zone > sc->reclaim_idx) { 4503 4503 + if (zone > sc->reclaim_idx) { 4504 4504 gen = folio_inc_gen(lruvec, folio, false); 4505 4505 list_move_tail(&folio->lru, &lrugen->folios[gen][type][zone]); 4506 4506 return true;

samples/damon/mtier.c

reviewed

··· 208 208 if (enabled == is_enabled) 209 209 return 0; 210 210 211 211 + if (!init_called) 212 212 + return 0; 213 213 + 211 214 if (enabled) { 212 215 err = damon_sample_mtier_start(); 213 216 if (err)

samples/damon/prcl.c

reviewed

··· 137 137 if (enabled == is_enabled) 138 138 return 0; 139 139 140 140 + if (!init_called) 141 141 + return 0; 142 142 + 140 143 if (enabled) { 141 144 err = damon_sample_prcl_start(); 142 145 if (err)

samples/damon/wsse.c

reviewed

··· 118 118 return 0; 119 119 120 120 if (enabled) { 121 121 + if (!init_called) 122 122 + return 0; 123 123 + 121 124 err = damon_sample_wsse_start(); 122 125 if (err) 123 126 enabled = false;