commit 413c1f14919b3efee05eb79d2e913663e964abda · tjh.dev/kernel

+4

.mailmap

··· 10 10 # Please keep this list dictionary sorted. 11 11 # 12 12 Aaron Durbin <adurbin@google.com> 13 + Abel Vesa <abelvesa@kernel.org> <abel.vesa@nxp.com> 14 + Abel Vesa <abelvesa@kernel.org> <abelvesa@gmail.com> 13 15 Abhinav Kumar <quic_abhinavk@quicinc.com> <abhinavk@codeaurora.org> 14 16 Adam Oldham <oldhamca@gmail.com> 15 17 Adam Radford <aradford@gmail.com> ··· 87 85 Christian Brauner <brauner@kernel.org> <christian@brauner.io> 88 86 Christian Brauner <brauner@kernel.org> <christian.brauner@canonical.com> 89 87 Christian Brauner <brauner@kernel.org> <christian.brauner@ubuntu.com> 88 + Christian Marangi <ansuelsmth@gmail.com> 90 89 Christophe Ricard <christophe.ricard@gmail.com> 91 90 Christoph Hellwig <hch@lst.de> 92 91 Colin Ian King <colin.king@intel.com> <colin.king@canonical.com> ··· 168 165 Jan Glauber <jan.glauber@gmail.com> <jang@linux.vnet.ibm.com> 169 166 Jan Glauber <jan.glauber@gmail.com> <jglauber@cavium.com> 170 167 Jarkko Sakkinen <jarkko@kernel.org> <jarkko.sakkinen@linux.intel.com> 168 + Jarkko Sakkinen <jarkko@kernel.org> <jarkko@profian.com> 171 169 Jason Gunthorpe <jgg@ziepe.ca> <jgg@mellanox.com> 172 170 Jason Gunthorpe <jgg@ziepe.ca> <jgg@nvidia.com> 173 171 Jason Gunthorpe <jgg@ziepe.ca> <jgunthorpe@obsidianresearch.com>

+2 -1

Documentation/vm/hwpoison.rst

··· 120 120 unpoison-pfn 121 121 Software-unpoison page at PFN echoed into this file. This way 122 122 a page can be reused again. This only works for Linux 123 - injected failures, not for real memory failures. 123 + injected failures, not for real memory failures. Once any hardware 124 + memory failure happens, this feature is disabled. 124 125 125 126 Note these injection interfaces are not stable and might change between 126 127 kernel versions

+17 -4

MAINTAINERS

··· 9152 9152 9153 9153 HWPOISON MEMORY FAILURE HANDLING 9154 9154 M: Naoya Horiguchi <naoya.horiguchi@nec.com> 9155 + R: Miaohe Lin <linmiaohe@huawei.com> 9155 9156 L: linux-mm@kvack.org 9156 9157 S: Maintained 9157 9158 F: mm/hwpoison-inject.c ··· 11624 11623 LOONGARCH 11625 11624 M: Huacai Chen <chenhuacai@kernel.org> 11626 11625 R: WANG Xuerui <kernel@xen0n.name> 11626 + L: loongarch@lists.linux.dev 11627 11627 S: Maintained 11628 11628 T: git git://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson.git 11629 11629 F: arch/loongarch/ ··· 12881 12879 L: linux-mm@kvack.org 12882 12880 S: Maintained 12883 12881 W: http://www.linux-mm.org 12884 - T: quilt https://ozlabs.org/~akpm/mmotm/ 12885 - T: quilt https://ozlabs.org/~akpm/mmots/ 12886 - T: git git://github.com/hnaz/linux-mm.git 12882 + T: git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm 12883 + T: quilt git://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new 12887 12884 F: include/linux/gfp.h 12888 12885 F: include/linux/memory_hotplug.h 12889 12886 F: include/linux/mm.h ··· 12891 12890 F: include/linux/vmalloc.h 12892 12891 F: mm/ 12893 12892 F: tools/testing/selftests/vm/ 12893 + 12894 + MEMORY HOT(UN)PLUG 12895 + M: David Hildenbrand <david@redhat.com> 12896 + M: Oscar Salvador <osalvador@suse.de> 12897 + L: linux-mm@kvack.org 12898 + S: Maintained 12899 + F: Documentation/admin-guide/mm/memory-hotplug.rst 12900 + F: Documentation/core-api/memory-hotplug.rst 12901 + F: drivers/base/memory.c 12902 + F: include/linux/memory_hotplug.h 12903 + F: mm/memory_hotplug.c 12904 + F: tools/testing/selftests/memory-hotplug/ 12894 12905 12895 12906 MEMORY TECHNOLOGY DEVICES (MTD) 12896 12907 M: Miquel Raynal <miquel.raynal@bootlin.com> ··· 14308 14295 F: drivers/iio/gyro/fxas21002c_spi.c 14309 14296 14310 14297 NXP i.MX CLOCK DRIVERS 14311 - M: Abel Vesa <abel.vesa@nxp.com> 14298 + M: Abel Vesa <abelvesa@kernel.org> 14312 14299 L: linux-clk@vger.kernel.org 14313 14300 L: linux-imx@nxp.com 14314 14301 S: Maintained

+1 -1

drivers/base/memory.c

··· 558 558 if (kstrtoull(buf, 0, &pfn) < 0) 559 559 return -EINVAL; 560 560 pfn >>= PAGE_SHIFT; 561 - ret = memory_failure(pfn, 0); 561 + ret = memory_failure(pfn, MF_SW_SIMULATED); 562 562 if (ret == -EOPNOTSUPP) 563 563 ret = 0; 564 564 return ret ? ret : count;

+55 -17

fs/hugetlbfs/inode.c

··· 600 600 remove_inode_hugepages(inode, offset, LLONG_MAX); 601 601 } 602 602 603 + static void hugetlbfs_zero_partial_page(struct hstate *h, 604 + struct address_space *mapping, 605 + loff_t start, 606 + loff_t end) 607 + { 608 + pgoff_t idx = start >> huge_page_shift(h); 609 + struct folio *folio; 610 + 611 + folio = filemap_lock_folio(mapping, idx); 612 + if (!folio) 613 + return; 614 + 615 + start = start & ~huge_page_mask(h); 616 + end = end & ~huge_page_mask(h); 617 + if (!end) 618 + end = huge_page_size(h); 619 + 620 + folio_zero_segment(folio, (size_t)start, (size_t)end); 621 + 622 + folio_unlock(folio); 623 + folio_put(folio); 624 + } 625 + 603 626 static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) 604 627 { 628 + struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode); 629 + struct address_space *mapping = inode->i_mapping; 605 630 struct hstate *h = hstate_inode(inode); 606 631 loff_t hpage_size = huge_page_size(h); 607 632 loff_t hole_start, hole_end; 608 633 609 634 /* 610 - * For hole punch round up the beginning offset of the hole and 611 - * round down the end. 635 + * hole_start and hole_end indicate the full pages within the hole. 612 636 */ 613 637 hole_start = round_up(offset, hpage_size); 614 638 hole_end = round_down(offset + len, hpage_size); 615 639 640 + inode_lock(inode); 641 + 642 + /* protected by i_rwsem */ 643 + if (info->seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE)) { 644 + inode_unlock(inode); 645 + return -EPERM; 646 + } 647 + 648 + i_mmap_lock_write(mapping); 649 + 650 + /* If range starts before first full page, zero partial page. */ 651 + if (offset < hole_start) 652 + hugetlbfs_zero_partial_page(h, mapping, 653 + offset, min(offset + len, hole_start)); 654 + 655 + /* Unmap users of full pages in the hole. */ 616 656 if (hole_end > hole_start) { 617 - struct address_space *mapping = inode->i_mapping; 618 - struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode); 619 - 620 - inode_lock(inode); 621 - 622 - /* protected by i_rwsem */ 623 - if (info->seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE)) { 624 - inode_unlock(inode); 625 - return -EPERM; 626 - } 627 - 628 - i_mmap_lock_write(mapping); 629 657 if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)) 630 658 hugetlb_vmdelete_list(&mapping->i_mmap, 631 659 hole_start >> PAGE_SHIFT, 632 660 hole_end >> PAGE_SHIFT, 0); 633 - i_mmap_unlock_write(mapping); 634 - remove_inode_hugepages(inode, hole_start, hole_end); 635 - inode_unlock(inode); 636 661 } 662 + 663 + /* If range extends beyond last full page, zero partial page. */ 664 + if ((offset + len) > hole_end && (offset + len) > hole_start) 665 + hugetlbfs_zero_partial_page(h, mapping, 666 + hole_end, offset + len); 667 + 668 + i_mmap_unlock_write(mapping); 669 + 670 + /* Remove full pages from the file. */ 671 + if (hole_end > hole_start) 672 + remove_inode_hugepages(inode, hole_start, hole_end); 673 + 674 + inode_unlock(inode); 637 675 638 676 return 0; 639 677 }

+2 -1

include/linux/mm.h

··· 1600 1600 if (mt == MIGRATE_CMA || mt == MIGRATE_ISOLATE) 1601 1601 return false; 1602 1602 #endif 1603 - return !(is_zone_movable_page(page) || is_zero_pfn(page_to_pfn(page))); 1603 + return !is_zone_movable_page(page) || is_zero_pfn(page_to_pfn(page)); 1604 1604 } 1605 1605 #else 1606 1606 static inline bool is_pinnable_page(struct page *page) ··· 3232 3232 MF_MUST_KILL = 1 << 2, 3233 3233 MF_SOFT_OFFLINE = 1 << 3, 3234 3234 MF_UNPOISON = 1 << 4, 3235 + MF_SW_SIMULATED = 1 << 5, 3235 3236 }; 3236 3237 extern int memory_failure(unsigned long pfn, int flags); 3237 3238 extern void memory_failure_queue(unsigned long pfn, int flags);

+7 -7

kernel/kthread.c

··· 340 340 341 341 self = to_kthread(current); 342 342 343 - /* If user was SIGKILLed, I release the structure. */ 343 + /* Release the structure when caller killed by a fatal signal. */ 344 344 done = xchg(&create->done, NULL); 345 345 if (!done) { 346 346 kfree(create); ··· 398 398 /* We want our own signal handler (we take no signals by default). */ 399 399 pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD); 400 400 if (pid < 0) { 401 - /* If user was SIGKILLed, I release the structure. */ 401 + /* Release the structure when caller killed by a fatal signal. */ 402 402 struct completion *done = xchg(&create->done, NULL); 403 403 404 404 if (!done) { ··· 440 440 */ 441 441 if (unlikely(wait_for_completion_killable(&done))) { 442 442 /* 443 - * If I was SIGKILLed before kthreadd (or new kernel thread) 444 - * calls complete(), leave the cleanup of this structure to 445 - * that thread. 443 + * If I was killed by a fatal signal before kthreadd (or new 444 + * kernel thread) calls complete(), leave the cleanup of this 445 + * structure to that thread. 446 446 */ 447 447 if (xchg(&create->done, NULL)) 448 448 return ERR_PTR(-EINTR); ··· 876 876 * 877 877 * Returns a pointer to the allocated worker on success, ERR_PTR(-ENOMEM) 878 878 * when the needed structures could not get allocated, and ERR_PTR(-EINTR) 879 - * when the worker was SIGKILLed. 879 + * when the caller was killed by a fatal signal. 880 880 */ 881 881 struct kthread_worker * 882 882 kthread_create_worker(unsigned int flags, const char namefmt[], ...) ··· 925 925 * Return: 926 926 * The pointer to the allocated worker on success, ERR_PTR(-ENOMEM) 927 927 * when the needed structures could not get allocated, and ERR_PTR(-EINTR) 928 - * when the worker was SIGKILLed. 928 + * when the caller was killed by a fatal signal. 929 929 */ 930 930 struct kthread_worker * 931 931 kthread_create_worker_on_cpu(int cpu, unsigned int flags,

+8

mm/damon/reclaim.c

··· 374 374 } 375 375 static DECLARE_DELAYED_WORK(damon_reclaim_timer, damon_reclaim_timer_fn); 376 376 377 + static bool damon_reclaim_initialized; 378 + 377 379 static int enabled_store(const char *val, 378 380 const struct kernel_param *kp) 379 381 { 380 382 int rc = param_set_bool(val, kp); 381 383 382 384 if (rc < 0) 385 + return rc; 386 + 387 + /* system_wq might not initialized yet */ 388 + if (!damon_reclaim_initialized) 383 389 return rc; 384 390 385 391 if (enabled) ··· 455 449 damon_add_target(ctx, target); 456 450 457 451 schedule_delayed_work(&damon_reclaim_timer, 0); 452 + 453 + damon_reclaim_initialized = true; 458 454 return 0; 459 455 } 460 456

+1 -1

mm/hwpoison-inject.c

··· 48 48 49 49 inject: 50 50 pr_info("Injecting memory failure at pfn %#lx\n", pfn); 51 - err = memory_failure(pfn, 0); 51 + err = memory_failure(pfn, MF_SW_SIMULATED); 52 52 return (err == -EOPNOTSUPP) ? 0 : err; 53 53 } 54 54

+5 -2

mm/kfence/core.c

··· 360 360 unsigned long flags; 361 361 struct slab *slab; 362 362 void *addr; 363 + const bool random_right_allocate = prandom_u32_max(2); 364 + const bool random_fault = CONFIG_KFENCE_STRESS_TEST_FAULTS && 365 + !prandom_u32_max(CONFIG_KFENCE_STRESS_TEST_FAULTS); 363 366 364 367 /* Try to obtain a free object. */ 365 368 raw_spin_lock_irqsave(&kfence_freelist_lock, flags); ··· 407 404 * is that the out-of-bounds accesses detected are deterministic for 408 405 * such allocations. 409 406 */ 410 - if (prandom_u32_max(2)) { 407 + if (random_right_allocate) { 411 408 /* Allocate on the "right" side, re-calculate address. */ 412 409 meta->addr += PAGE_SIZE - size; 413 410 meta->addr = ALIGN_DOWN(meta->addr, cache->align); ··· 447 444 if (cache->ctor) 448 445 cache->ctor(addr); 449 446 450 - if (CONFIG_KFENCE_STRESS_TEST_FAULTS && !prandom_u32_max(CONFIG_KFENCE_STRESS_TEST_FAULTS)) 447 + if (random_fault) 451 448 kfence_protect(meta->addr); /* Random "faults" by protecting the object. */ 452 449 453 450 atomic_long_inc(&counters[KFENCE_COUNTER_ALLOCATED]);

+1 -1

mm/madvise.c

··· 1112 1112 } else { 1113 1113 pr_info("Injecting memory failure for pfn %#lx at process virtual address %#lx\n", 1114 1114 pfn, start); 1115 - ret = memory_failure(pfn, MF_COUNT_INCREASED); 1115 + ret = memory_failure(pfn, MF_COUNT_INCREASED | MF_SW_SIMULATED); 1116 1116 if (ret == -EOPNOTSUPP) 1117 1117 ret = 0; 1118 1118 }

+1 -1

mm/memcontrol.c

··· 4859 4859 { 4860 4860 /* 4861 4861 * Deprecated. 4862 - * Please, take a look at tools/cgroup/slabinfo.py . 4862 + * Please, take a look at tools/cgroup/memcg_slabinfo.py . 4863 4863 */ 4864 4864 return 0; 4865 4865 }

+12

mm/memory-failure.c

··· 69 69 70 70 atomic_long_t num_poisoned_pages __read_mostly = ATOMIC_LONG_INIT(0); 71 71 72 + static bool hw_memory_failure __read_mostly = false; 73 + 72 74 static bool __page_handle_poison(struct page *page) 73 75 { 74 76 int ret; ··· 1770 1768 1771 1769 mutex_lock(&mf_mutex); 1772 1770 1771 + if (!(flags & MF_SW_SIMULATED)) 1772 + hw_memory_failure = true; 1773 + 1773 1774 p = pfn_to_online_page(pfn); 1774 1775 if (!p) { 1775 1776 res = arch_memory_failure(pfn, flags); ··· 2107 2102 page = compound_head(p); 2108 2103 2109 2104 mutex_lock(&mf_mutex); 2105 + 2106 + if (hw_memory_failure) { 2107 + unpoison_pr_info("Unpoison: Disabled after HW memory failure %#lx\n", 2108 + pfn, &unpoison_rs); 2109 + ret = -EOPNOTSUPP; 2110 + goto unlock_mutex; 2111 + } 2110 2112 2111 2113 if (!PageHWPoison(p)) { 2112 2114 unpoison_pr_info("Unpoison: Page was already unpoisoned %#lx\n",

+2

mm/page_isolation.c

··· 286 286 * @flags: isolation flags 287 287 * @gfp_flags: GFP flags used for migrating pages 288 288 * @isolate_before: isolate the pageblock before the boundary_pfn 289 + * @skip_isolation: the flag to skip the pageblock isolation in second 290 + * isolate_single_pageblock() 289 291 * 290 292 * Free and in-use pages can be as big as MAX_ORDER-1 and contain more than one 291 293 * pageblock. When not all pageblocks within a page are isolated at the same

+1 -1

mm/swap.c

··· 881 881 * lru_disable_count = 0 will have exited the critical 882 882 * section when synchronize_rcu() returns. 883 883 */ 884 - synchronize_rcu(); 884 + synchronize_rcu_expedited(); 885 885 #ifdef CONFIG_SMP 886 886 __lru_add_drain_all(true); 887 887 #else