Merge tag 'mm-hotfixes-stable-2023-08-11-13-44' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm

+1

MAINTAINERS

··· 12480 12480 12481 12481 MAPLE TREE 12482 12482 M: Liam R. Howlett <Liam.Howlett@oracle.com> 12483 + L: maple-tree@lists.infradead.org 12483 12484 L: linux-mm@kvack.org 12484 12485 S: Supported 12485 12486 F: Documentation/core-api/maple_tree.rst

+8

fs/nilfs2/inode.c

··· 1101 1101 1102 1102 int __nilfs_mark_inode_dirty(struct inode *inode, int flags) 1103 1103 { 1104 + struct the_nilfs *nilfs = inode->i_sb->s_fs_info; 1104 1105 struct buffer_head *ibh; 1105 1106 int err; 1107 + 1108 + /* 1109 + * Do not dirty inodes after the log writer has been detached 1110 + * and its nilfs_root struct has been freed. 1111 + */ 1112 + if (unlikely(nilfs_purging(nilfs))) 1113 + return 0; 1106 1114 1107 1115 err = nilfs_load_inode_block(inode, &ibh); 1108 1116 if (unlikely(err)) {

+2

fs/nilfs2/segment.c

··· 2845 2845 nilfs_segctor_destroy(nilfs->ns_writer); 2846 2846 nilfs->ns_writer = NULL; 2847 2847 } 2848 + set_nilfs_purging(nilfs); 2848 2849 2849 2850 /* Force to free the list of dirty files */ 2850 2851 spin_lock(&nilfs->ns_inode_lock); ··· 2858 2857 up_write(&nilfs->ns_segctor_sem); 2859 2858 2860 2859 nilfs_dispose_list(nilfs, &garbage_list, 1); 2860 + clear_nilfs_purging(nilfs); 2861 2861 }

+2

fs/nilfs2/the_nilfs.h

··· 29 29 THE_NILFS_DISCONTINUED, /* 'next' pointer chain has broken */ 30 30 THE_NILFS_GC_RUNNING, /* gc process is running */ 31 31 THE_NILFS_SB_DIRTY, /* super block is dirty */ 32 + THE_NILFS_PURGING, /* disposing dirty files for cleanup */ 32 33 }; 33 34 34 35 /** ··· 209 208 THE_NILFS_FNS(DISCONTINUED, discontinued) 210 209 THE_NILFS_FNS(GC_RUNNING, gc_running) 211 210 THE_NILFS_FNS(SB_DIRTY, sb_dirty) 211 + THE_NILFS_FNS(PURGING, purging) 212 212 213 213 /* 214 214 * Mount option operations

+27 -3

fs/proc/kcore.c

··· 309 309 310 310 static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter) 311 311 { 312 + struct file *file = iocb->ki_filp; 313 + char *buf = file->private_data; 312 314 loff_t *fpos = &iocb->ki_pos; 313 315 size_t phdrs_offset, notes_offset, data_offset; 314 316 size_t page_offline_frozen = 1; ··· 557 555 case KCORE_VMEMMAP: 558 556 case KCORE_TEXT: 559 557 /* 560 - * We use _copy_to_iter() to bypass usermode hardening 561 - * which would otherwise prevent this operation. 558 + * Sadly we must use a bounce buffer here to be able to 559 + * make use of copy_from_kernel_nofault(), as these 560 + * memory regions might not always be mapped on all 561 + * architectures. 562 562 */ 563 - if (_copy_to_iter((char *)start, tsz, iter) != tsz) { 563 + if (copy_from_kernel_nofault(buf, (void *)start, tsz)) { 564 + if (iov_iter_zero(tsz, iter) != tsz) { 565 + ret = -EFAULT; 566 + goto out; 567 + } 568 + /* 569 + * We know the bounce buffer is safe to copy from, so 570 + * use _copy_to_iter() directly. 571 + */ 572 + } else if (_copy_to_iter(buf, tsz, iter) != tsz) { 564 573 ret = -EFAULT; 565 574 goto out; 566 575 } ··· 608 595 if (ret) 609 596 return ret; 610 597 598 + filp->private_data = kmalloc(PAGE_SIZE, GFP_KERNEL); 599 + if (!filp->private_data) 600 + return -ENOMEM; 601 + 611 602 if (kcore_need_update) 612 603 kcore_update_ram(); 613 604 if (i_size_read(inode) != proc_root_kcore->size) { ··· 622 605 return 0; 623 606 } 624 607 608 + static int release_kcore(struct inode *inode, struct file *file) 609 + { 610 + kfree(file->private_data); 611 + return 0; 612 + } 613 + 625 614 static const struct proc_ops kcore_proc_ops = { 626 615 .proc_read_iter = read_kcore_iter, 627 616 .proc_open = open_kcore, 617 + .proc_release = release_kcore, 628 618 .proc_lseek = default_llseek, 629 619 }; 630 620

+1 -1

lib/scatterlist.c

··· 1148 1148 1149 1149 failed: 1150 1150 while (sgtable->nents > sgtable->orig_nents) 1151 - put_page(sg_page(&sgtable->sgl[--sgtable->nents])); 1151 + unpin_user_page(sg_page(&sgtable->sgl[--sgtable->nents])); 1152 1152 return res; 1153 1153 } 1154 1154

+5 -3

mm/compaction.c

··· 912 912 913 913 /* 914 914 * Check if the pageblock has already been marked skipped. 915 - * Only the aligned PFN is checked as the caller isolates 915 + * Only the first PFN is checked as the caller isolates 916 916 * COMPACT_CLUSTER_MAX at a time so the second call must 917 917 * not falsely conclude that the block should be skipped. 918 918 */ 919 - if (!valid_page && pageblock_aligned(low_pfn)) { 919 + if (!valid_page && (pageblock_aligned(low_pfn) || 920 + low_pfn == cc->zone->zone_start_pfn)) { 920 921 if (!isolation_suitable(cc, page)) { 921 922 low_pfn = end_pfn; 922 923 folio = NULL; ··· 2003 2002 * before making it "skip" so other compaction instances do 2004 2003 * not scan the same block. 2005 2004 */ 2006 - if (pageblock_aligned(low_pfn) && 2005 + if ((pageblock_aligned(low_pfn) || 2006 + low_pfn == cc->zone->zone_start_pfn) && 2007 2007 !fast_find_block && !isolation_suitable(cc, page)) 2008 2008 continue; 2009 2009

+1

mm/damon/core.c

··· 273 273 return NULL; 274 274 filter->type = type; 275 275 filter->matching = matching; 276 + INIT_LIST_HEAD(&filter->list); 276 277 return filter; 277 278 } 278 279

+51 -24

mm/hugetlb.c

··· 1579 1579 unsigned int order) { } 1580 1580 #endif 1581 1581 1582 + static inline void __clear_hugetlb_destructor(struct hstate *h, 1583 + struct folio *folio) 1584 + { 1585 + lockdep_assert_held(&hugetlb_lock); 1586 + 1587 + /* 1588 + * Very subtle 1589 + * 1590 + * For non-gigantic pages set the destructor to the normal compound 1591 + * page dtor. This is needed in case someone takes an additional 1592 + * temporary ref to the page, and freeing is delayed until they drop 1593 + * their reference. 1594 + * 1595 + * For gigantic pages set the destructor to the null dtor. This 1596 + * destructor will never be called. Before freeing the gigantic 1597 + * page destroy_compound_gigantic_folio will turn the folio into a 1598 + * simple group of pages. After this the destructor does not 1599 + * apply. 1600 + * 1601 + */ 1602 + if (hstate_is_gigantic(h)) 1603 + folio_set_compound_dtor(folio, NULL_COMPOUND_DTOR); 1604 + else 1605 + folio_set_compound_dtor(folio, COMPOUND_PAGE_DTOR); 1606 + } 1607 + 1582 1608 /* 1583 - * Remove hugetlb folio from lists, and update dtor so that the folio appears 1584 - * as just a compound page. 1609 + * Remove hugetlb folio from lists. 1610 + * If vmemmap exists for the folio, update dtor so that the folio appears 1611 + * as just a compound page. Otherwise, wait until after allocating vmemmap 1612 + * to update dtor. 1585 1613 * 1586 1614 * A reference is held on the folio, except in the case of demote. 1587 1615 * ··· 1640 1612 } 1641 1613 1642 1614 /* 1643 - * Very subtle 1644 - * 1645 - * For non-gigantic pages set the destructor to the normal compound 1646 - * page dtor. This is needed in case someone takes an additional 1647 - * temporary ref to the page, and freeing is delayed until they drop 1648 - * their reference. 1649 - * 1650 - * For gigantic pages set the destructor to the null dtor. This 1651 - * destructor will never be called. Before freeing the gigantic 1652 - * page destroy_compound_gigantic_folio will turn the folio into a 1653 - * simple group of pages. After this the destructor does not 1654 - * apply. 1655 - * 1656 - * This handles the case where more than one ref is held when and 1657 - * after update_and_free_hugetlb_folio is called. 1658 - * 1659 - * In the case of demote we do not ref count the page as it will soon 1660 - * be turned into a page of smaller size. 1615 + * We can only clear the hugetlb destructor after allocating vmemmap 1616 + * pages. Otherwise, someone (memory error handling) may try to write 1617 + * to tail struct pages. 1618 + */ 1619 + if (!folio_test_hugetlb_vmemmap_optimized(folio)) 1620 + __clear_hugetlb_destructor(h, folio); 1621 + 1622 + /* 1623 + * In the case of demote we do not ref count the page as it will soon 1624 + * be turned into a page of smaller size. 1661 1625 */ 1662 1626 if (!demote) 1663 1627 folio_ref_unfreeze(folio, 1); 1664 - if (hstate_is_gigantic(h)) 1665 - folio_set_compound_dtor(folio, NULL_COMPOUND_DTOR); 1666 - else 1667 - folio_set_compound_dtor(folio, COMPOUND_PAGE_DTOR); 1668 1628 1669 1629 h->nr_huge_pages--; 1670 1630 h->nr_huge_pages_node[nid]--; ··· 1721 1705 { 1722 1706 int i; 1723 1707 struct page *subpage; 1708 + bool clear_dtor = folio_test_hugetlb_vmemmap_optimized(folio); 1724 1709 1725 1710 if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported()) 1726 1711 return; ··· 1751 1734 */ 1752 1735 if (unlikely(folio_test_hwpoison(folio))) 1753 1736 folio_clear_hugetlb_hwpoison(folio); 1737 + 1738 + /* 1739 + * If vmemmap pages were allocated above, then we need to clear the 1740 + * hugetlb destructor under the hugetlb lock. 1741 + */ 1742 + if (clear_dtor) { 1743 + spin_lock_irq(&hugetlb_lock); 1744 + __clear_hugetlb_destructor(h, folio); 1745 + spin_unlock_irq(&hugetlb_lock); 1746 + } 1754 1747 1755 1748 for (i = 0; i < pages_per_huge_page(h); i++) { 1756 1749 subpage = folio_page(folio, i);

+2

mm/ksm.c

··· 2784 2784 anon_vma->root == vma->anon_vma->root) { 2785 2785 return page; /* still no need to copy it */ 2786 2786 } 2787 + if (PageHWPoison(page)) 2788 + return ERR_PTR(-EHWPOISON); 2787 2789 if (!PageUptodate(page)) 2788 2790 return page; /* let do_swap_page report the error */ 2789 2791

+16 -13

mm/memory-failure.c

··· 2466 2466 { 2467 2467 struct folio *folio; 2468 2468 struct page *p; 2469 - int ret = -EBUSY; 2469 + int ret = -EBUSY, ghp; 2470 2470 unsigned long count = 1; 2471 2471 bool huge = false; 2472 2472 static DEFINE_RATELIMIT_STATE(unpoison_rs, DEFAULT_RATELIMIT_INTERVAL, ··· 2499 2499 goto unlock_mutex; 2500 2500 } 2501 2501 2502 + if (folio_test_slab(folio) || PageTable(&folio->page) || folio_test_reserved(folio)) 2503 + goto unlock_mutex; 2504 + 2505 + /* 2506 + * Note that folio->_mapcount is overloaded in SLAB, so the simple test 2507 + * in folio_mapped() has to be done after folio_test_slab() is checked. 2508 + */ 2502 2509 if (folio_mapped(folio)) { 2503 2510 unpoison_pr_info("Unpoison: Someone maps the hwpoison page %#lx\n", 2504 2511 pfn, &unpoison_rs); ··· 2518 2511 goto unlock_mutex; 2519 2512 } 2520 2513 2521 - if (folio_test_slab(folio) || PageTable(&folio->page) || folio_test_reserved(folio)) 2522 - goto unlock_mutex; 2523 - 2524 - ret = get_hwpoison_page(p, MF_UNPOISON); 2525 - if (!ret) { 2514 + ghp = get_hwpoison_page(p, MF_UNPOISON); 2515 + if (!ghp) { 2526 2516 if (PageHuge(p)) { 2527 2517 huge = true; 2528 2518 count = folio_free_raw_hwp(folio, false); 2529 - if (count == 0) { 2530 - ret = -EBUSY; 2519 + if (count == 0) 2531 2520 goto unlock_mutex; 2532 - } 2533 2521 } 2534 2522 ret = folio_test_clear_hwpoison(folio) ? 0 : -EBUSY; 2535 - } else if (ret < 0) { 2536 - if (ret == -EHWPOISON) { 2523 + } else if (ghp < 0) { 2524 + if (ghp == -EHWPOISON) { 2537 2525 ret = put_page_back_buddy(p) ? 0 : -EBUSY; 2538 - } else 2526 + } else { 2527 + ret = ghp; 2539 2528 unpoison_pr_info("Unpoison: failed to grab page %#lx\n", 2540 2529 pfn, &unpoison_rs); 2530 + } 2541 2531 } else { 2542 2532 if (PageHuge(p)) { 2543 2533 huge = true; 2544 2534 count = folio_free_raw_hwp(folio, false); 2545 2535 if (count == 0) { 2546 - ret = -EBUSY; 2547 2536 folio_put(folio); 2548 2537 goto unlock_mutex; 2549 2538 }

+4 -4

mm/swapfile.c

··· 1746 1746 struct page *swapcache; 1747 1747 spinlock_t *ptl; 1748 1748 pte_t *pte, new_pte, old_pte; 1749 - bool hwposioned = false; 1749 + bool hwpoisoned = PageHWPoison(page); 1750 1750 int ret = 1; 1751 1751 1752 1752 swapcache = page; ··· 1754 1754 if (unlikely(!page)) 1755 1755 return -ENOMEM; 1756 1756 else if (unlikely(PTR_ERR(page) == -EHWPOISON)) 1757 - hwposioned = true; 1757 + hwpoisoned = true; 1758 1758 1759 1759 pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); 1760 1760 if (unlikely(!pte || !pte_same_as_swp(ptep_get(pte), ··· 1765 1765 1766 1766 old_pte = ptep_get(pte); 1767 1767 1768 - if (unlikely(hwposioned || !PageUptodate(page))) { 1768 + if (unlikely(hwpoisoned || !PageUptodate(page))) { 1769 1769 swp_entry_t swp_entry; 1770 1770 1771 1771 dec_mm_counter(vma->vm_mm, MM_SWAPENTS); 1772 - if (hwposioned) { 1772 + if (hwpoisoned) { 1773 1773 swp_entry = make_hwpoison_entry(swapcache); 1774 1774 page = swapcache; 1775 1775 } else {

+9 -5

mm/zsmalloc.c

··· 1798 1798 1799 1799 static bool zs_page_isolate(struct page *page, isolate_mode_t mode) 1800 1800 { 1801 + struct zs_pool *pool; 1801 1802 struct zspage *zspage; 1802 1803 1803 1804 /* ··· 1808 1807 VM_BUG_ON_PAGE(PageIsolated(page), page); 1809 1808 1810 1809 zspage = get_zspage(page); 1811 - migrate_write_lock(zspage); 1810 + pool = zspage->pool; 1811 + spin_lock(&pool->lock); 1812 1812 inc_zspage_isolation(zspage); 1813 - migrate_write_unlock(zspage); 1813 + spin_unlock(&pool->lock); 1814 1814 1815 1815 return true; 1816 1816 } ··· 1877 1875 kunmap_atomic(s_addr); 1878 1876 1879 1877 replace_sub_page(class, zspage, newpage, page); 1878 + dec_zspage_isolation(zspage); 1880 1879 /* 1881 1880 * Since we complete the data copy and set up new zspage structure, 1882 1881 * it's okay to release the pool's lock. 1883 1882 */ 1884 1883 spin_unlock(&pool->lock); 1885 - dec_zspage_isolation(zspage); 1886 1884 migrate_write_unlock(zspage); 1887 1885 1888 1886 get_page(newpage); ··· 1899 1897 1900 1898 static void zs_page_putback(struct page *page) 1901 1899 { 1900 + struct zs_pool *pool; 1902 1901 struct zspage *zspage; 1903 1902 1904 1903 VM_BUG_ON_PAGE(!PageIsolated(page), page); 1905 1904 1906 1905 zspage = get_zspage(page); 1907 - migrate_write_lock(zspage); 1906 + pool = zspage->pool; 1907 + spin_lock(&pool->lock); 1908 1908 dec_zspage_isolation(zspage); 1909 - migrate_write_unlock(zspage); 1909 + spin_unlock(&pool->lock); 1910 1910 } 1911 1911 1912 1912 static const struct movable_operations zsmalloc_mops = {

+1 -1

tools/testing/radix-tree/regression1.c

··· 177 177 nr_threads = 2; 178 178 pthread_barrier_init(&worker_barrier, NULL, nr_threads); 179 179 180 - threads = malloc(nr_threads * sizeof(pthread_t *)); 180 + threads = malloc(nr_threads * sizeof(*threads)); 181 181 182 182 for (i = 0; i < nr_threads; i++) { 183 183 arg = i;

+4

tools/testing/selftests/cgroup/test_kmem.c

··· 70 70 goto cleanup; 71 71 72 72 cg_write(cg, "memory.high", "1M"); 73 + 74 + /* wait for RCU freeing */ 75 + sleep(1); 76 + 73 77 slab1 = cg_read_key_long(cg, "memory.stat", "slab "); 74 78 if (slab1 <= 0) 75 79 goto cleanup;

+1

tools/testing/selftests/mm/ksm_tests.c

··· 831 831 printf("Size must be greater than 0\n"); 832 832 return KSFT_FAIL; 833 833 } 834 + break; 834 835 case 't': 835 836 { 836 837 int tmp = atoi(optarg);