Merge tag 'mm-hotfixes-stable-2026-01-29-09-41' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm

tjh.dev / kernel

fork atom

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

fork atom

Merge tag 'mm-hotfixes-stable-2026-01-29-09-41' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm

Pull misc fixes from Andrew Morton:
"16 hotfixes. 9 are cc:stable, 12 are for MM.

There's a patch series from Pratyush Yadav which fixes a few things in
the new-in-6.19 LUO memfd code.

Plus the usual shower of singletons - please see the changelogs for
details"

* tag 'mm-hotfixes-stable-2026-01-29-09-41' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm:
vmcoreinfo: make hwerr_data visible for debugging
mm/zone_device: reinitialize large zone device private folios
mm/mm_init: don't cond_resched() in deferred_init_memmap_chunk() if called from deferred_grow_zone()
mm/kfence: randomize the freelist on initialization
kho: kho_preserve_vmalloc(): don't return 0 when ENOMEM
kho: init alloc tags when restoring pages from reserved memory
mm: memfd_luo: restore and free memfd_luo_ser on failure
mm: memfd_luo: use memfd_alloc_file() instead of shmem_file_setup()
memfd: export alloc_file()
flex_proportions: make fprop_new_period() hardirq safe
mailmap: add entry for Viacheslav Bocharov
mm/memory-failure: teach kill_accessing_process to accept hugetlb tail page pfn
mm/memory-failure: fix missing ->mf_stats count in hugetlb poison
mm, swap: restore swap_space attr aviod kernel panic
mm/kasan: fix KASAN poisoning in vrealloc()
mm/shmem, swap: fix race of truncate and swap entry split

Linus Torvalds 2 months ago bcb6058a 1cac3891

+239 -87

23 changed files

expand all collapse all

.mailmap

arch

powerpc

kvm

book3s_hv_uvmem.c

drivers

gpu

drm

amd

amdkfd

kfd_migrate.c

drm_pagemap.c

nouveau

nouveau_dmem.c

include

linux

kasan.h

memfd.h

memremap.h

kernel

liveupdate

kexec_handover.c

vmcore_info.c

lib

flex_proportions.c

test_hmm.c

kasan

common.c

kfence

core.c

memfd.c

memfd_luo.c

memory-failure.c

memremap.c

mm_init.c

shmem.c

swap.h

swap_state.c

vmalloc.c

.mailmap

reviewed

··· 851 851 Veera Sundaram Sankaran <quic_veeras@quicinc.com> <veeras@codeaurora.org> 852 852 Veerabhadrarao Badiganti <quic_vbadigan@quicinc.com> <vbadigan@codeaurora.org> 853 853 Venkateswara Naralasetty <quic_vnaralas@quicinc.com> <vnaralas@codeaurora.org> 854 854 + Viacheslav Bocharov <v@baodeep.com> <adeep@lexina.in> 854 855 Vikash Garodia <vikash.garodia@oss.qualcomm.com> <vgarodia@codeaurora.org> 855 856 Vikash Garodia <vikash.garodia@oss.qualcomm.com> <quic_vgarodia@quicinc.com> 856 857 Vincent Mailhol <mailhol@kernel.org> <mailhol.vincent@wanadoo.fr>

+1 -1

arch/powerpc/kvm/book3s_hv_uvmem.c

reviewed

··· 723 723 724 724 dpage = pfn_to_page(uvmem_pfn); 725 725 dpage->zone_device_data = pvt; 726 726 - zone_device_page_init(dpage, 0); 726 726 + zone_device_page_init(dpage, &kvmppc_uvmem_pgmap, 0); 727 727 return dpage; 728 728 out_clear: 729 729 spin_lock(&kvmppc_uvmem_bitmap_lock);

+1 -1

drivers/gpu/drm/amd/amdkfd/kfd_migrate.c

reviewed

··· 217 217 page = pfn_to_page(pfn); 218 218 svm_range_bo_ref(prange->svm_bo); 219 219 page->zone_device_data = prange->svm_bo; 220 220 - zone_device_page_init(page, 0); 220 220 + zone_device_page_init(page, page_pgmap(page), 0); 221 221 } 222 222 223 223 static void

+1 -1

drivers/gpu/drm/drm_pagemap.c

reviewed

··· 197 197 struct drm_pagemap_zdd *zdd) 198 198 { 199 199 page->zone_device_data = drm_pagemap_zdd_get(zdd); 200 200 - zone_device_page_init(page, 0); 200 200 + zone_device_page_init(page, page_pgmap(page), 0); 201 201 } 202 202 203 203 /**

+1 -1

drivers/gpu/drm/nouveau/nouveau_dmem.c

reviewed

··· 425 425 order = ilog2(DMEM_CHUNK_NPAGES); 426 426 } 427 427 428 428 - zone_device_folio_init(folio, order); 428 428 + zone_device_folio_init(folio, page_pgmap(folio_page(folio, 0)), order); 429 429 return page; 430 430 } 431 431

+14

include/linux/kasan.h

reviewed

··· 641 641 __kasan_unpoison_vmap_areas(vms, nr_vms, flags); 642 642 } 643 643 644 644 + void __kasan_vrealloc(const void *start, unsigned long old_size, 645 645 + unsigned long new_size); 646 646 + 647 647 + static __always_inline void kasan_vrealloc(const void *start, 648 648 + unsigned long old_size, 649 649 + unsigned long new_size) 650 650 + { 651 651 + if (kasan_enabled()) 652 652 + __kasan_vrealloc(start, old_size, new_size); 653 653 + } 654 654 + 644 655 #else /* CONFIG_KASAN_VMALLOC */ 645 656 646 657 static inline void kasan_populate_early_vm_area_shadow(void *start, ··· 680 669 kasan_unpoison_vmap_areas(struct vm_struct **vms, int nr_vms, 681 670 kasan_vmalloc_flags_t flags) 682 671 { } 672 672 + 673 673 + static inline void kasan_vrealloc(const void *start, unsigned long old_size, 674 674 + unsigned long new_size) { } 683 675 684 676 #endif /* CONFIG_KASAN_VMALLOC */ 685 677

include/linux/memfd.h

reviewed

··· 17 17 * to by vm_flags_ptr. 18 18 */ 19 19 int memfd_check_seals_mmap(struct file *file, vm_flags_t *vm_flags_ptr); 20 20 + struct file *memfd_alloc_file(const char *name, unsigned int flags); 20 21 #else 21 22 static inline long memfd_fcntl(struct file *f, unsigned int c, unsigned int a) 22 23 { ··· 31 30 vm_flags_t *vm_flags_ptr) 32 31 { 33 32 return 0; 33 33 + } 34 34 + 35 35 + static inline struct file *memfd_alloc_file(const char *name, unsigned int flags) 36 36 + { 37 37 + return ERR_PTR(-EINVAL); 34 38 } 35 39 #endif 36 40

+6 -3

include/linux/memremap.h

reviewed

··· 224 224 } 225 225 226 226 #ifdef CONFIG_ZONE_DEVICE 227 227 - void zone_device_page_init(struct page *page, unsigned int order); 227 227 + void zone_device_page_init(struct page *page, struct dev_pagemap *pgmap, 228 228 + unsigned int order); 228 229 void *memremap_pages(struct dev_pagemap *pgmap, int nid); 229 230 void memunmap_pages(struct dev_pagemap *pgmap); 230 231 void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap); ··· 235 234 236 235 unsigned long memremap_compat_align(void); 237 236 238 238 - static inline void zone_device_folio_init(struct folio *folio, unsigned int order) 237 237 + static inline void zone_device_folio_init(struct folio *folio, 238 238 + struct dev_pagemap *pgmap, 239 239 + unsigned int order) 239 240 { 240 240 - zone_device_page_init(&folio->page, order); 241 241 + zone_device_page_init(&folio->page, pgmap, order); 241 242 if (order) 242 243 folio_set_large_rmappable(folio); 243 244 }

+11 -1

kernel/liveupdate/kexec_handover.c

reviewed

··· 255 255 if (is_folio && info.order) 256 256 prep_compound_page(page, info.order); 257 257 258 258 + /* Always mark headpage's codetag as empty to avoid accounting mismatch */ 259 259 + clear_page_tag_ref(page); 260 260 + if (!is_folio) { 261 261 + /* Also do that for the non-compound tail pages */ 262 262 + for (unsigned int i = 1; i < nr_pages; i++) 263 263 + clear_page_tag_ref(page + i); 264 264 + } 265 265 + 258 266 adjust_managed_page_count(page, nr_pages); 259 267 return page; 260 268 } ··· 1014 1006 chunk->phys[idx++] = phys; 1015 1007 if (idx == ARRAY_SIZE(chunk->phys)) { 1016 1008 chunk = new_vmalloc_chunk(chunk); 1017 1017 - if (!chunk) 1009 1009 + if (!chunk) { 1010 1010 + err = -ENOMEM; 1018 1011 goto err_free; 1012 1012 + } 1019 1013 idx = 0; 1020 1014 } 1021 1015 }

+5 -1

kernel/vmcore_info.c

reviewed

··· 36 36 time64_t timestamp; 37 37 }; 38 38 39 39 - static struct hwerr_info hwerr_data[HWERR_RECOV_MAX]; 39 39 + /* 40 40 + * The hwerr_data[] array is declared with global scope so that it remains 41 41 + * accessible to vmcoreinfo even when Link Time Optimization (LTO) is enabled. 42 42 + */ 43 43 + struct hwerr_info hwerr_data[HWERR_RECOV_MAX]; 40 44 41 45 Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type, 42 46 void *data, size_t data_len)

+3 -2

lib/flex_proportions.c

reviewed

··· 64 64 bool fprop_new_period(struct fprop_global *p, int periods) 65 65 { 66 66 s64 events = percpu_counter_sum(&p->events); 67 67 + unsigned long flags; 67 68 68 69 /* 69 70 * Don't do anything if there are no events. 70 71 */ 71 72 if (events <= 1) 72 73 return false; 73 73 - preempt_disable_nested(); 74 74 + local_irq_save(flags); 74 75 write_seqcount_begin(&p->sequence); 75 76 if (periods < 64) 76 77 events -= events >> periods; ··· 79 78 percpu_counter_add(&p->events, -events); 80 79 p->period += periods; 81 80 write_seqcount_end(&p->sequence); 82 82 - preempt_enable_nested(); 81 81 + local_irq_restore(flags); 83 82 84 83 return true; 85 84 }

+3 -1

lib/test_hmm.c

reviewed

··· 662 662 goto error; 663 663 } 664 664 665 665 - zone_device_folio_init(page_folio(dpage), order); 665 665 + zone_device_folio_init(page_folio(dpage), 666 666 + page_pgmap(folio_page(page_folio(dpage), 0)), 667 667 + order); 666 668 dpage->zone_device_data = rpage; 667 669 return dpage; 668 670

+21

mm/kasan/common.c

reviewed

··· 606 606 __kasan_unpoison_vmalloc(addr, size, flags | KASAN_VMALLOC_KEEP_TAG); 607 607 } 608 608 } 609 609 + 610 610 + void __kasan_vrealloc(const void *addr, unsigned long old_size, 611 611 + unsigned long new_size) 612 612 + { 613 613 + if (new_size < old_size) { 614 614 + kasan_poison_last_granule(addr, new_size); 615 615 + 616 616 + new_size = round_up(new_size, KASAN_GRANULE_SIZE); 617 617 + old_size = round_up(old_size, KASAN_GRANULE_SIZE); 618 618 + if (new_size < old_size) 619 619 + __kasan_poison_vmalloc(addr + new_size, 620 620 + old_size - new_size); 621 621 + } else if (new_size > old_size) { 622 622 + old_size = round_down(old_size, KASAN_GRANULE_SIZE); 623 623 + __kasan_unpoison_vmalloc(addr + old_size, 624 624 + new_size - old_size, 625 625 + KASAN_VMALLOC_PROT_NORMAL | 626 626 + KASAN_VMALLOC_VM_ALLOC | 627 627 + KASAN_VMALLOC_KEEP_TAG); 628 628 + } 629 629 + } 609 630 #endif

+19 -4

mm/kfence/core.c

reviewed

··· 596 596 static unsigned long kfence_init_pool(void) 597 597 { 598 598 unsigned long addr, start_pfn; 599 599 - int i; 599 599 + int i, rand; 600 600 601 601 if (!arch_kfence_init_pool()) 602 602 return (unsigned long)__kfence_pool; ··· 647 647 INIT_LIST_HEAD(&meta->list); 648 648 raw_spin_lock_init(&meta->lock); 649 649 meta->state = KFENCE_OBJECT_UNUSED; 650 650 - meta->addr = addr; /* Initialize for validation in metadata_to_pageaddr(). */ 651 651 - list_add_tail(&meta->list, &kfence_freelist); 650 650 + /* Use addr to randomize the freelist. */ 651 651 + meta->addr = i; 652 652 653 653 /* Protect the right redzone. */ 654 654 - if (unlikely(!kfence_protect(addr + PAGE_SIZE))) 654 654 + if (unlikely(!kfence_protect(addr + 2 * i * PAGE_SIZE + PAGE_SIZE))) 655 655 goto reset_slab; 656 656 + } 656 657 658 658 + for (i = CONFIG_KFENCE_NUM_OBJECTS; i > 0; i--) { 659 659 + rand = get_random_u32_below(i); 660 660 + swap(kfence_metadata_init[i - 1].addr, kfence_metadata_init[rand].addr); 661 661 + } 662 662 + 663 663 + for (i = 0; i < CONFIG_KFENCE_NUM_OBJECTS; i++) { 664 664 + struct kfence_metadata *meta_1 = &kfence_metadata_init[i]; 665 665 + struct kfence_metadata *meta_2 = &kfence_metadata_init[meta_1->addr]; 666 666 + 667 667 + list_add_tail(&meta_2->list, &kfence_freelist); 668 668 + } 669 669 + for (i = 0; i < CONFIG_KFENCE_NUM_OBJECTS; i++) { 670 670 + kfence_metadata_init[i].addr = addr; 657 671 addr += 2 * PAGE_SIZE; 658 672 } 659 673 ··· 680 666 return 0; 681 667 682 668 reset_slab: 669 669 + addr += 2 * i * PAGE_SIZE; 683 670 for (i = 0; i < KFENCE_POOL_SIZE / PAGE_SIZE; i++) { 684 671 struct page *page; 685 672

+2 -2

mm/memfd.c

reviewed

··· 456 456 return ERR_PTR(error); 457 457 } 458 458 459 459 - static struct file *alloc_file(const char *name, unsigned int flags) 459 459 + struct file *memfd_alloc_file(const char *name, unsigned int flags) 460 460 { 461 461 unsigned int *file_seals; 462 462 struct file *file; ··· 520 520 return PTR_ERR(name); 521 521 522 522 fd_flags = (flags & MFD_CLOEXEC) ? O_CLOEXEC : 0; 523 523 - return FD_ADD(fd_flags, alloc_file(name, flags)); 523 523 + return FD_ADD(fd_flags, memfd_alloc_file(name, flags)); 524 524 }

+6 -4

mm/memfd_luo.c

reviewed

··· 78 78 #include <linux/liveupdate.h> 79 79 #include <linux/shmem_fs.h> 80 80 #include <linux/vmalloc.h> 81 81 + #include <linux/memfd.h> 81 82 #include "internal.h" 82 83 83 84 static int memfd_luo_preserve_folios(struct file *file, ··· 444 443 if (!ser) 445 444 return -EINVAL; 446 445 447 447 - file = shmem_file_setup("", 0, VM_NORESERVE); 448 448 - 446 446 + file = memfd_alloc_file("", 0); 449 447 if (IS_ERR(file)) { 450 448 pr_err("failed to setup file: %pe\n", file); 451 451 - return PTR_ERR(file); 449 449 + err = PTR_ERR(file); 450 450 + goto free_ser; 452 451 } 453 452 454 453 vfs_setpos(file, ser->pos, MAX_LFS_FILESIZE); ··· 474 473 475 474 put_file: 476 475 fput(file); 477 477 - 476 476 + free_ser: 477 477 + kho_restore_free(ser); 478 478 return err; 479 479 } 480 480

+60 -39

mm/memory-failure.c

reviewed

··· 692 692 unsigned long poisoned_pfn, struct to_kill *tk) 693 693 { 694 694 unsigned long pfn = 0; 695 695 + unsigned long hwpoison_vaddr; 696 696 + unsigned long mask; 695 697 696 698 if (pte_present(pte)) { 697 699 pfn = pte_pfn(pte); ··· 704 702 pfn = softleaf_to_pfn(entry); 705 703 } 706 704 707 707 - if (!pfn || pfn != poisoned_pfn) 705 705 + mask = ~((1UL << (shift - PAGE_SHIFT)) - 1); 706 706 + if (!pfn || pfn != (poisoned_pfn & mask)) 708 707 return 0; 709 708 710 710 - set_to_kill(tk, addr, shift); 709 709 + hwpoison_vaddr = addr + ((poisoned_pfn - pfn) << PAGE_SHIFT); 710 710 + set_to_kill(tk, hwpoison_vaddr, shift); 711 711 return 1; 712 712 } 713 713 ··· 1887 1883 return count; 1888 1884 } 1889 1885 1890 1890 - static int folio_set_hugetlb_hwpoison(struct folio *folio, struct page *page) 1886 1886 + #define MF_HUGETLB_FREED 0 /* freed hugepage */ 1887 1887 + #define MF_HUGETLB_IN_USED 1 /* in-use hugepage */ 1888 1888 + #define MF_HUGETLB_NON_HUGEPAGE 2 /* not a hugepage */ 1889 1889 + #define MF_HUGETLB_FOLIO_PRE_POISONED 3 /* folio already poisoned */ 1890 1890 + #define MF_HUGETLB_PAGE_PRE_POISONED 4 /* exact page already poisoned */ 1891 1891 + #define MF_HUGETLB_RETRY 5 /* hugepage is busy, retry */ 1892 1892 + /* 1893 1893 + * Set hugetlb folio as hwpoisoned, update folio private raw hwpoison list 1894 1894 + * to keep track of the poisoned pages. 1895 1895 + */ 1896 1896 + static int hugetlb_update_hwpoison(struct folio *folio, struct page *page) 1891 1897 { 1892 1898 struct llist_head *head; 1893 1899 struct raw_hwp_page *raw_hwp; 1894 1900 struct raw_hwp_page *p; 1895 1895 - int ret = folio_test_set_hwpoison(folio) ? -EHWPOISON : 0; 1901 1901 + int ret = folio_test_set_hwpoison(folio) ? MF_HUGETLB_FOLIO_PRE_POISONED : 0; 1896 1902 1897 1903 /* 1898 1904 * Once the hwpoison hugepage has lost reliable raw error info, ··· 1910 1896 * so skip to add additional raw error info. 1911 1897 */ 1912 1898 if (folio_test_hugetlb_raw_hwp_unreliable(folio)) 1913 1913 - return -EHWPOISON; 1899 1899 + return MF_HUGETLB_FOLIO_PRE_POISONED; 1914 1900 head = raw_hwp_list_head(folio); 1915 1901 llist_for_each_entry(p, head->first, node) { 1916 1902 if (p->page == page) 1917 1917 - return -EHWPOISON; 1903 1903 + return MF_HUGETLB_PAGE_PRE_POISONED; 1918 1904 } 1919 1905 1920 1906 raw_hwp = kmalloc(sizeof(struct raw_hwp_page), GFP_ATOMIC); 1921 1907 if (raw_hwp) { 1922 1908 raw_hwp->page = page; 1923 1909 llist_add(&raw_hwp->node, head); 1924 1924 - /* the first error event will be counted in action_result(). */ 1925 1925 - if (ret) 1926 1926 - num_poisoned_pages_inc(page_to_pfn(page)); 1927 1910 } else { 1928 1911 /* 1929 1912 * Failed to save raw error info. We no longer trace all ··· 1968 1957 1969 1958 /* 1970 1959 * Called from hugetlb code with hugetlb_lock held. 1971 1971 - * 1972 1972 - * Return values: 1973 1973 - * 0 - free hugepage 1974 1974 - * 1 - in-use hugepage 1975 1975 - * 2 - not a hugepage 1976 1976 - * -EBUSY - the hugepage is busy (try to retry) 1977 1977 - * -EHWPOISON - the hugepage is already hwpoisoned 1978 1960 */ 1979 1961 int __get_huge_page_for_hwpoison(unsigned long pfn, int flags, 1980 1962 bool *migratable_cleared) 1981 1963 { 1982 1964 struct page *page = pfn_to_page(pfn); 1983 1965 struct folio *folio = page_folio(page); 1984 1984 - int ret = 2; /* fallback to normal page handling */ 1985 1966 bool count_increased = false; 1967 1967 + int ret, rc; 1986 1968 1987 1987 - if (!folio_test_hugetlb(folio)) 1969 1969 + if (!folio_test_hugetlb(folio)) { 1970 1970 + ret = MF_HUGETLB_NON_HUGEPAGE; 1988 1971 goto out; 1989 1989 - 1990 1990 - if (flags & MF_COUNT_INCREASED) { 1991 1991 - ret = 1; 1972 1972 + } else if (flags & MF_COUNT_INCREASED) { 1973 1973 + ret = MF_HUGETLB_IN_USED; 1992 1974 count_increased = true; 1993 1975 } else if (folio_test_hugetlb_freed(folio)) { 1994 1994 - ret = 0; 1976 1976 + ret = MF_HUGETLB_FREED; 1995 1977 } else if (folio_test_hugetlb_migratable(folio)) { 1996 1996 - ret = folio_try_get(folio); 1997 1997 - if (ret) 1978 1978 + if (folio_try_get(folio)) { 1979 1979 + ret = MF_HUGETLB_IN_USED; 1998 1980 count_increased = true; 1981 1981 + } else { 1982 1982 + ret = MF_HUGETLB_FREED; 1983 1983 + } 1999 1984 } else { 2000 2000 - ret = -EBUSY; 1985 1985 + ret = MF_HUGETLB_RETRY; 2001 1986 if (!(flags & MF_NO_RETRY)) 2002 1987 goto out; 2003 1988 } 2004 1989 2005 2005 - if (folio_set_hugetlb_hwpoison(folio, page)) { 2006 2006 - ret = -EHWPOISON; 1990 1990 + rc = hugetlb_update_hwpoison(folio, page); 1991 1991 + if (rc >= MF_HUGETLB_FOLIO_PRE_POISONED) { 1992 1992 + ret = rc; 2007 1993 goto out; 2008 1994 } 2009 1995 ··· 2025 2017 * with basic operations like hugepage allocation/free/demotion. 2026 2018 * So some of prechecks for hwpoison (pinning, and testing/setting 2027 2019 * PageHWPoison) should be done in single hugetlb_lock range. 2020 2020 + * Returns: 2021 2021 + * 0 - not hugetlb, or recovered 2022 2022 + * -EBUSY - not recovered 2023 2023 + * -EOPNOTSUPP - hwpoison_filter'ed 2024 2024 + * -EHWPOISON - folio or exact page already poisoned 2025 2025 + * -EFAULT - kill_accessing_process finds current->mm null 2028 2026 */ 2029 2027 static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb) 2030 2028 { 2031 2031 - int res; 2029 2029 + int res, rv; 2032 2030 struct page *p = pfn_to_page(pfn); 2033 2031 struct folio *folio; 2034 2032 unsigned long page_flags; ··· 2043 2029 *hugetlb = 1; 2044 2030 retry: 2045 2031 res = get_huge_page_for_hwpoison(pfn, flags, &migratable_cleared); 2046 2046 - if (res == 2) { /* fallback to normal page handling */ 2032 2032 + switch (res) { 2033 2033 + case MF_HUGETLB_NON_HUGEPAGE: /* fallback to normal page handling */ 2047 2034 *hugetlb = 0; 2048 2035 return 0; 2049 2049 - } else if (res == -EHWPOISON) { 2050 2050 - if (flags & MF_ACTION_REQUIRED) { 2051 2051 - folio = page_folio(p); 2052 2052 - res = kill_accessing_process(current, folio_pfn(folio), flags); 2053 2053 - } 2054 2054 - action_result(pfn, MF_MSG_ALREADY_POISONED, MF_FAILED); 2055 2055 - return res; 2056 2056 - } else if (res == -EBUSY) { 2036 2036 + case MF_HUGETLB_RETRY: 2057 2037 if (!(flags & MF_NO_RETRY)) { 2058 2038 flags |= MF_NO_RETRY; 2059 2039 goto retry; 2060 2040 } 2061 2041 return action_result(pfn, MF_MSG_GET_HWPOISON, MF_IGNORED); 2042 2042 + case MF_HUGETLB_FOLIO_PRE_POISONED: 2043 2043 + case MF_HUGETLB_PAGE_PRE_POISONED: 2044 2044 + rv = -EHWPOISON; 2045 2045 + if (flags & MF_ACTION_REQUIRED) 2046 2046 + rv = kill_accessing_process(current, pfn, flags); 2047 2047 + if (res == MF_HUGETLB_PAGE_PRE_POISONED) 2048 2048 + action_result(pfn, MF_MSG_ALREADY_POISONED, MF_FAILED); 2049 2049 + else 2050 2050 + action_result(pfn, MF_MSG_HUGE, MF_FAILED); 2051 2051 + return rv; 2052 2052 + default: 2053 2053 + WARN_ON((res != MF_HUGETLB_FREED) && (res != MF_HUGETLB_IN_USED)); 2054 2054 + break; 2062 2055 } 2063 2056 2064 2057 folio = page_folio(p); ··· 2076 2055 if (migratable_cleared) 2077 2056 folio_set_hugetlb_migratable(folio); 2078 2057 folio_unlock(folio); 2079 2079 - if (res == 1) 2058 2058 + if (res == MF_HUGETLB_IN_USED) 2080 2059 folio_put(folio); 2081 2060 return -EOPNOTSUPP; 2082 2061 } ··· 2085 2064 * Handling free hugepage. The possible race with hugepage allocation 2086 2065 * or demotion can be prevented by PageHWPoison flag. 2087 2066 */ 2088 2088 - if (res == 0) { 2067 2067 + if (res == MF_HUGETLB_FREED) { 2089 2068 folio_unlock(folio); 2090 2069 if (__page_handle_poison(p) > 0) { 2091 2070 page_ref_inc(p);

+34 -1

mm/memremap.c

reviewed

··· 477 477 } 478 478 } 479 479 480 480 - void zone_device_page_init(struct page *page, unsigned int order) 480 480 + void zone_device_page_init(struct page *page, struct dev_pagemap *pgmap, 481 481 + unsigned int order) 481 482 { 483 483 + struct page *new_page = page; 484 484 + unsigned int i; 485 485 + 482 486 VM_WARN_ON_ONCE(order > MAX_ORDER_NR_PAGES); 487 487 + 488 488 + for (i = 0; i < (1UL << order); ++i, ++new_page) { 489 489 + struct folio *new_folio = (struct folio *)new_page; 490 490 + 491 491 + /* 492 492 + * new_page could have been part of previous higher order folio 493 493 + * which encodes the order, in page + 1, in the flags bits. We 494 494 + * blindly clear bits which could have set my order field here, 495 495 + * including page head. 496 496 + */ 497 497 + new_page->flags.f &= ~0xffUL; /* Clear possible order, page head */ 498 498 + 499 499 + #ifdef NR_PAGES_IN_LARGE_FOLIO 500 500 + /* 501 501 + * This pointer math looks odd, but new_page could have been 502 502 + * part of a previous higher order folio, which sets _nr_pages 503 503 + * in page + 1 (new_page). Therefore, we use pointer casting to 504 504 + * correctly locate the _nr_pages bits within new_page which 505 505 + * could have modified by previous higher order folio. 506 506 + */ 507 507 + ((struct folio *)(new_page - 1))->_nr_pages = 0; 508 508 + #endif 509 509 + 510 510 + new_folio->mapping = NULL; 511 511 + new_folio->pgmap = pgmap; /* Also clear compound head */ 512 512 + new_folio->share = 0; /* fsdax only, unused for device private */ 513 513 + VM_WARN_ON_FOLIO(folio_ref_count(new_folio), new_folio); 514 514 + VM_WARN_ON_FOLIO(!folio_is_zone_device(new_folio), new_folio); 515 515 + } 483 516 484 517 /* 485 518 * Drivers shouldn't be allocating pages after calling

+6 -6

mm/mm_init.c

reviewed

··· 2059 2059 */ 2060 2060 static unsigned long __init 2061 2061 deferred_init_memmap_chunk(unsigned long start_pfn, unsigned long end_pfn, 2062 2062 - struct zone *zone) 2062 2062 + struct zone *zone, bool can_resched) 2063 2063 { 2064 2064 int nid = zone_to_nid(zone); 2065 2065 unsigned long nr_pages = 0; ··· 2085 2085 2086 2086 spfn = chunk_end; 2087 2087 2088 2088 - if (irqs_disabled()) 2089 2089 - touch_nmi_watchdog(); 2090 2090 - else 2088 2088 + if (can_resched) 2091 2089 cond_resched(); 2090 2090 + else 2091 2091 + touch_nmi_watchdog(); 2092 2092 } 2093 2093 } 2094 2094 ··· 2101 2101 { 2102 2102 struct zone *zone = arg; 2103 2103 2104 2104 - deferred_init_memmap_chunk(start_pfn, end_pfn, zone); 2104 2104 + deferred_init_memmap_chunk(start_pfn, end_pfn, zone, true); 2105 2105 } 2106 2106 2107 2107 static unsigned int __init ··· 2216 2216 for (spfn = first_deferred_pfn, epfn = SECTION_ALIGN_UP(spfn + 1); 2217 2217 nr_pages < nr_pages_needed && spfn < zone_end_pfn(zone); 2218 2218 spfn = epfn, epfn += PAGES_PER_SECTION) { 2219 2219 - nr_pages += deferred_init_memmap_chunk(spfn, epfn, zone); 2219 2219 + nr_pages += deferred_init_memmap_chunk(spfn, epfn, zone, false); 2220 2220 } 2221 2221 2222 2222 /*

+34 -11

mm/shmem.c

reviewed

··· 962 962 * being freed). 963 963 */ 964 964 static long shmem_free_swap(struct address_space *mapping, 965 965 - pgoff_t index, void *radswap) 965 965 + pgoff_t index, pgoff_t end, void *radswap) 966 966 { 967 967 - int order = xa_get_order(&mapping->i_pages, index); 968 968 - void *old; 967 967 + XA_STATE(xas, &mapping->i_pages, index); 968 968 + unsigned int nr_pages = 0; 969 969 + pgoff_t base; 970 970 + void *entry; 969 971 970 970 - old = xa_cmpxchg_irq(&mapping->i_pages, index, radswap, NULL, 0); 971 971 - if (old != radswap) 972 972 - return 0; 973 973 - free_swap_and_cache_nr(radix_to_swp_entry(radswap), 1 << order); 972 972 + xas_lock_irq(&xas); 973 973 + entry = xas_load(&xas); 974 974 + if (entry == radswap) { 975 975 + nr_pages = 1 << xas_get_order(&xas); 976 976 + base = round_down(xas.xa_index, nr_pages); 977 977 + if (base < index || base + nr_pages - 1 > end) 978 978 + nr_pages = 0; 979 979 + else 980 980 + xas_store(&xas, NULL); 981 981 + } 982 982 + xas_unlock_irq(&xas); 974 983 975 975 - return 1 << order; 984 984 + if (nr_pages) 985 985 + free_swap_and_cache_nr(radix_to_swp_entry(radswap), nr_pages); 986 986 + 987 987 + return nr_pages; 976 988 } 977 989 978 990 /* ··· 1136 1124 if (xa_is_value(folio)) { 1137 1125 if (unfalloc) 1138 1126 continue; 1139 1139 - nr_swaps_freed += shmem_free_swap(mapping, 1140 1140 - indices[i], folio); 1127 1127 + nr_swaps_freed += shmem_free_swap(mapping, indices[i], 1128 1128 + end - 1, folio); 1141 1129 continue; 1142 1130 } 1143 1131 ··· 1203 1191 folio = fbatch.folios[i]; 1204 1192 1205 1193 if (xa_is_value(folio)) { 1194 1194 + int order; 1206 1195 long swaps_freed; 1207 1196 1208 1197 if (unfalloc) 1209 1198 continue; 1210 1210 - swaps_freed = shmem_free_swap(mapping, indices[i], folio); 1199 1199 + swaps_freed = shmem_free_swap(mapping, indices[i], 1200 1200 + end - 1, folio); 1211 1201 if (!swaps_freed) { 1202 1202 + /* 1203 1203 + * If found a large swap entry cross the end border, 1204 1204 + * skip it as the truncate_inode_partial_folio above 1205 1205 + * should have at least zerod its content once. 1206 1206 + */ 1207 1207 + order = shmem_confirm_swap(mapping, indices[i], 1208 1208 + radix_to_swp_entry(folio)); 1209 1209 + if (order > 0 && indices[i] + (1 << order) > end) 1210 1210 + continue; 1212 1211 /* Swap was replaced by page: retry */ 1213 1212 index = indices[i]; 1214 1213 break;

+1 -1

mm/swap.h

reviewed

··· 198 198 void __swap_writepage(struct folio *folio, struct swap_iocb **swap_plug); 199 199 200 200 /* linux/mm/swap_state.c */ 201 201 - extern struct address_space swap_space __ro_after_init; 201 201 + extern struct address_space swap_space __read_mostly; 202 202 static inline struct address_space *swap_address_space(swp_entry_t entry) 203 203 { 204 204 return &swap_space;

+1 -2

mm/swap_state.c

reviewed

··· 37 37 #endif 38 38 }; 39 39 40 40 - /* Set swap_space as read only as swap cache is handled by swap table */ 41 41 - struct address_space swap_space __ro_after_init = { 40 40 + struct address_space swap_space __read_mostly = { 42 41 .a_ops = &swap_aops, 43 42 }; 44 43

+2 -5

mm/vmalloc.c

reviewed

··· 4322 4322 if (want_init_on_free() || want_init_on_alloc(flags)) 4323 4323 memset((void *)p + size, 0, old_size - size); 4324 4324 vm->requested_size = size; 4325 4325 - kasan_poison_vmalloc(p + size, old_size - size); 4325 4325 + kasan_vrealloc(p, old_size, size); 4326 4326 return (void *)p; 4327 4327 } 4328 4328 ··· 4330 4330 * We already have the bytes available in the allocation; use them. 4331 4331 */ 4332 4332 if (size <= alloced_size) { 4333 4333 - kasan_unpoison_vmalloc(p + old_size, size - old_size, 4334 4334 - KASAN_VMALLOC_PROT_NORMAL | 4335 4335 - KASAN_VMALLOC_VM_ALLOC | 4336 4336 - KASAN_VMALLOC_KEEP_TAG); 4337 4333 /* 4338 4334 * No need to zero memory here, as unused memory will have 4339 4335 * already been zeroed at initial allocation time or during 4340 4336 * realloc shrink time. 4341 4337 */ 4342 4338 vm->requested_size = size; 4339 4339 + kasan_vrealloc(p, old_size, size); 4343 4340 return (void *)p; 4344 4341 } 4345 4342