···1101110111021102int __nilfs_mark_inode_dirty(struct inode *inode, int flags)11031103{11041104+ struct the_nilfs *nilfs = inode->i_sb->s_fs_info;11041105 struct buffer_head *ibh;11051106 int err;11071107+11081108+ /*11091109+ * Do not dirty inodes after the log writer has been detached11101110+ * and its nilfs_root struct has been freed.11111111+ */11121112+ if (unlikely(nilfs_purging(nilfs)))11131113+ return 0;1106111411071115 err = nilfs_load_inode_block(inode, &ibh);11081116 if (unlikely(err)) {
+7
fs/nilfs2/segment.c
···725725 struct folio *folio = fbatch.folios[i];726726727727 folio_lock(folio);728728+ if (unlikely(folio->mapping != mapping)) {729729+ /* Exclude folios removed from the address space */730730+ folio_unlock(folio);731731+ continue;732732+ }728733 head = folio_buffers(folio);729734 if (!head) {730735 create_empty_buffers(&folio->page, i_blocksize(inode), 0);···28502845 nilfs_segctor_destroy(nilfs->ns_writer);28512846 nilfs->ns_writer = NULL;28522847 }28482848+ set_nilfs_purging(nilfs);2853284928542850 /* Force to free the list of dirty files */28552851 spin_lock(&nilfs->ns_inode_lock);···28632857 up_write(&nilfs->ns_segctor_sem);2864285828652859 nilfs_dispose_list(nilfs, &garbage_list, 1);28602860+ clear_nilfs_purging(nilfs);28662861}
+2
fs/nilfs2/the_nilfs.h
···2929 THE_NILFS_DISCONTINUED, /* 'next' pointer chain has broken */3030 THE_NILFS_GC_RUNNING, /* gc process is running */3131 THE_NILFS_SB_DIRTY, /* super block is dirty */3232+ THE_NILFS_PURGING, /* disposing dirty files for cleanup */3233};33343435/**···209208THE_NILFS_FNS(DISCONTINUED, discontinued)210209THE_NILFS_FNS(GC_RUNNING, gc_running)211210THE_NILFS_FNS(SB_DIRTY, sb_dirty)211211+THE_NILFS_FNS(PURGING, purging)212212213213/*214214 * Mount option operations
+27-3
fs/proc/kcore.c
···309309310310static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)311311{312312+ struct file *file = iocb->ki_filp;313313+ char *buf = file->private_data;312314 loff_t *fpos = &iocb->ki_pos;313315 size_t phdrs_offset, notes_offset, data_offset;314316 size_t page_offline_frozen = 1;···557555 case KCORE_VMEMMAP:558556 case KCORE_TEXT:559557 /*560560- * We use _copy_to_iter() to bypass usermode hardening561561- * which would otherwise prevent this operation.558558+ * Sadly we must use a bounce buffer here to be able to559559+ * make use of copy_from_kernel_nofault(), as these560560+ * memory regions might not always be mapped on all561561+ * architectures.562562 */563563- if (_copy_to_iter((char *)start, tsz, iter) != tsz) {563563+ if (copy_from_kernel_nofault(buf, (void *)start, tsz)) {564564+ if (iov_iter_zero(tsz, iter) != tsz) {565565+ ret = -EFAULT;566566+ goto out;567567+ }568568+ /*569569+ * We know the bounce buffer is safe to copy from, so570570+ * use _copy_to_iter() directly.571571+ */572572+ } else if (_copy_to_iter(buf, tsz, iter) != tsz) {564573 ret = -EFAULT;565574 goto out;566575 }···608595 if (ret)609596 return ret;610597598598+ filp->private_data = kmalloc(PAGE_SIZE, GFP_KERNEL);599599+ if (!filp->private_data)600600+ return -ENOMEM;601601+611602 if (kcore_need_update)612603 kcore_update_ram();613604 if (i_size_read(inode) != proc_root_kcore->size) {···622605 return 0;623606}624607608608+static int release_kcore(struct inode *inode, struct file *file)609609+{610610+ kfree(file->private_data);611611+ return 0;612612+}613613+625614static const struct proc_ops kcore_proc_ops = {626615 .proc_read_iter = read_kcore_iter,627616 .proc_open = open_kcore,617617+ .proc_release = release_kcore,628618 .proc_lseek = default_llseek,629619};630620
···2525#endif26262727vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf);2828-struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,2929- unsigned long addr, pmd_t *pmd,3030- unsigned int flags);3128bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,3229 pmd_t *pmd, unsigned long addr, unsigned long next);3330int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd,
+15-6
include/linux/mm.h
···34963496 * Indicates whether GUP can follow a PROT_NONE mapped page, or whether34973497 * a (NUMA hinting) fault is required.34983498 */34993499-static inline bool gup_can_follow_protnone(unsigned int flags)34993499+static inline bool gup_can_follow_protnone(struct vm_area_struct *vma,35003500+ unsigned int flags)35003501{35013502 /*35023502- * FOLL_FORCE has to be able to make progress even if the VMA is35033503- * inaccessible. Further, FOLL_FORCE access usually does not represent35043504- * application behaviour and we should avoid triggering NUMA hinting35053505- * faults.35033503+ * If callers don't want to honor NUMA hinting faults, no need to35043504+ * determine if we would actually have to trigger a NUMA hinting fault.35063505 */35073507- return flags & FOLL_FORCE;35063506+ if (!(flags & FOLL_HONOR_NUMA_FAULT))35073507+ return true;35083508+35093509+ /*35103510+ * NUMA hinting faults don't apply in inaccessible (PROT_NONE) VMAs.35113511+ *35123512+ * Requiring a fault here even for inaccessible VMAs would mean that35133513+ * FOLL_FORCE cannot make any progress, because handle_mm_fault()35143514+ * refuses to process NUMA hinting faults in inaccessible VMAs.35153515+ */35163516+ return !vma_is_accessible(vma);35083517}3509351835103519typedef int (*pte_fn_t)(pte_t *pte, unsigned long addr, void *data);
+9
include/linux/mm_types.h
···13561356 FOLL_PCI_P2PDMA = 1 << 10,13571357 /* allow interrupts from generic signals */13581358 FOLL_INTERRUPTIBLE = 1 << 11,13591359+ /*13601360+ * Always honor (trigger) NUMA hinting faults.13611361+ *13621362+ * FOLL_WRITE implicitly honors NUMA hinting faults because a13631363+ * PROT_NONE-mapped page is not writable (exceptions with FOLL_FORCE13641364+ * apply). get_user_pages_fast_only() always implicitly honors NUMA13651365+ * hinting faults.13661366+ */13671367+ FOLL_HONOR_NUMA_FAULT = 1 << 12,1359136813601369 /* See also internal only FOLL flags in mm/internal.h */13611370};
+11
include/linux/pagewalk.h
···6677struct mm_walk;8899+/* Locking requirement during a page walk. */1010+enum page_walk_lock {1111+ /* mmap_lock should be locked for read to stabilize the vma tree */1212+ PGWALK_RDLOCK = 0,1313+ /* vma will be write-locked during the walk */1414+ PGWALK_WRLOCK = 1,1515+ /* vma is expected to be already write-locked during the walk */1616+ PGWALK_WRLOCK_VERIFY = 2,1717+};1818+919/**1020 * struct mm_walk_ops - callbacks for walk_page_range1121 * @pgd_entry: if set, called for each non-empty PGD (top-level) entry···7666 int (*pre_vma)(unsigned long start, unsigned long end,7767 struct mm_walk *walk);7868 void (*post_vma)(struct mm_walk *walk);6969+ enum page_walk_lock walk_lock;7970};80718172/*
···1148114811491149failed:11501150 while (sgtable->nents > sgtable->orig_nents)11511151- put_page(sg_page(&sgtable->sgl[--sgtable->nents]));11511151+ unpin_user_page(sg_page(&sgtable->sgl[--sgtable->nents]));11521152 return res;11531153}11541154
+5-3
mm/compaction.c
···933933934934 /*935935 * Check if the pageblock has already been marked skipped.936936- * Only the aligned PFN is checked as the caller isolates936936+ * Only the first PFN is checked as the caller isolates937937 * COMPACT_CLUSTER_MAX at a time so the second call must938938 * not falsely conclude that the block should be skipped.939939 */940940- if (!valid_page && pageblock_aligned(low_pfn)) {940940+ if (!valid_page && (pageblock_aligned(low_pfn) ||941941+ low_pfn == cc->zone->zone_start_pfn)) {941942 if (!isolation_suitable(cc, page)) {942943 low_pfn = end_pfn;943944 folio = NULL;···20312030 * before making it "skip" so other compaction instances do20322031 * not scan the same block.20332032 */20342034- if (pageblock_aligned(low_pfn) &&20332033+ if ((pageblock_aligned(low_pfn) ||20342034+ low_pfn == cc->zone->zone_start_pfn) &&20352035 !fast_find_block && !isolation_suitable(cc, page))20362036 continue;20372037
···14671467 if ((flags & FOLL_DUMP) && is_huge_zero_pmd(*pmd))14681468 return ERR_PTR(-EFAULT);1469146914701470- /* Full NUMA hinting faults to serialise migration in fault paths */14711471- if (pmd_protnone(*pmd) && !gup_can_follow_protnone(flags))14701470+ if (pmd_protnone(*pmd) && !gup_can_follow_protnone(vma, flags))14721471 return NULL;1473147214741473 if (!pmd_write(*pmd) && gup_must_unshare(vma, flags, page))
+51-24
mm/hugetlb.c
···15801580 unsigned int order) { }15811581#endif1582158215831583+static inline void __clear_hugetlb_destructor(struct hstate *h,15841584+ struct folio *folio)15851585+{15861586+ lockdep_assert_held(&hugetlb_lock);15871587+15881588+ /*15891589+ * Very subtle15901590+ *15911591+ * For non-gigantic pages set the destructor to the normal compound15921592+ * page dtor. This is needed in case someone takes an additional15931593+ * temporary ref to the page, and freeing is delayed until they drop15941594+ * their reference.15951595+ *15961596+ * For gigantic pages set the destructor to the null dtor. This15971597+ * destructor will never be called. Before freeing the gigantic15981598+ * page destroy_compound_gigantic_folio will turn the folio into a15991599+ * simple group of pages. After this the destructor does not16001600+ * apply.16011601+ *16021602+ */16031603+ if (hstate_is_gigantic(h))16041604+ folio_set_compound_dtor(folio, NULL_COMPOUND_DTOR);16051605+ else16061606+ folio_set_compound_dtor(folio, COMPOUND_PAGE_DTOR);16071607+}16081608+15831609/*15841584- * Remove hugetlb folio from lists, and update dtor so that the folio appears15851585- * as just a compound page.16101610+ * Remove hugetlb folio from lists.16111611+ * If vmemmap exists for the folio, update dtor so that the folio appears16121612+ * as just a compound page. Otherwise, wait until after allocating vmemmap16131613+ * to update dtor.15861614 *15871615 * A reference is held on the folio, except in the case of demote.15881616 *···16411613 }1642161416431615 /*16441644- * Very subtle16451645- *16461646- * For non-gigantic pages set the destructor to the normal compound16471647- * page dtor. This is needed in case someone takes an additional16481648- * temporary ref to the page, and freeing is delayed until they drop16491649- * their reference.16501650- *16511651- * For gigantic pages set the destructor to the null dtor. This16521652- * destructor will never be called. Before freeing the gigantic16531653- * page destroy_compound_gigantic_folio will turn the folio into a16541654- * simple group of pages. After this the destructor does not16551655- * apply.16561656- *16571657- * This handles the case where more than one ref is held when and16581658- * after update_and_free_hugetlb_folio is called.16591659- *16601660- * In the case of demote we do not ref count the page as it will soon16611661- * be turned into a page of smaller size.16161616+ * We can only clear the hugetlb destructor after allocating vmemmap16171617+ * pages. Otherwise, someone (memory error handling) may try to write16181618+ * to tail struct pages.16191619+ */16201620+ if (!folio_test_hugetlb_vmemmap_optimized(folio))16211621+ __clear_hugetlb_destructor(h, folio);16221622+16231623+ /*16241624+ * In the case of demote we do not ref count the page as it will soon16251625+ * be turned into a page of smaller size.16621626 */16631627 if (!demote)16641628 folio_ref_unfreeze(folio, 1);16651665- if (hstate_is_gigantic(h))16661666- folio_set_compound_dtor(folio, NULL_COMPOUND_DTOR);16671667- else16681668- folio_set_compound_dtor(folio, COMPOUND_PAGE_DTOR);1669162916701630 h->nr_huge_pages--;16711631 h->nr_huge_pages_node[nid]--;···17221706{17231707 int i;17241708 struct page *subpage;17091709+ bool clear_dtor = folio_test_hugetlb_vmemmap_optimized(folio);1725171017261711 if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported())17271712 return;···17521735 */17531736 if (unlikely(folio_test_hwpoison(folio)))17541737 folio_clear_hugetlb_hwpoison(folio);17381738+17391739+ /*17401740+ * If vmemmap pages were allocated above, then we need to clear the17411741+ * hugetlb destructor under the hugetlb lock.17421742+ */17431743+ if (clear_dtor) {17441744+ spin_lock_irq(&hugetlb_lock);17451745+ __clear_hugetlb_destructor(h, folio);17461746+ spin_unlock_irq(&hugetlb_lock);17471747+ }1755174817561749 for (i = 0; i < pages_per_huge_page(h); i++) {17571750 subpage = folio_page(folio, i);
+17
mm/internal.h
···941941struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags);942942int __must_check try_grab_page(struct page *page, unsigned int flags);943943944944+/*945945+ * mm/huge_memory.c946946+ */947947+struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,948948+ unsigned long addr, pmd_t *pmd,949949+ unsigned int flags);950950+944951enum {945952 /* mark page accessed */946953 FOLL_TOUCH = 1 << 16,···10201013 /* Paired with a memory barrier in page_try_share_anon_rmap(). */10211014 if (IS_ENABLED(CONFIG_HAVE_FAST_GUP))10221015 smp_rmb();10161016+10171017+ /*10181018+ * During GUP-fast we might not get called on the head page for a10191019+ * hugetlb page that is mapped using cont-PTE, because GUP-fast does10201020+ * not work with the abstracted hugetlb PTEs that always point at the10211021+ * head page. For hugetlb, PageAnonExclusive only applies on the head10221022+ * page (as it cannot be partially COW-shared), so lookup the head page.10231023+ */10241024+ if (unlikely(!PageHead(page) && PageHuge(page)))10251025+ page = compound_head(page);1023102610241027 /*10251028 * Note that PageKsm() pages cannot be exclusive, and consequently,
+18-9
mm/ksm.c
···462462463463static const struct mm_walk_ops break_ksm_ops = {464464 .pmd_entry = break_ksm_pmd_entry,465465+ .walk_lock = PGWALK_RDLOCK,466466+};467467+468468+static const struct mm_walk_ops break_ksm_lock_vma_ops = {469469+ .pmd_entry = break_ksm_pmd_entry,470470+ .walk_lock = PGWALK_WRLOCK,465471};466472467473/*···483477 * of the process that owns 'vma'. We also do not want to enforce484478 * protection keys here anyway.485479 */486486-static int break_ksm(struct vm_area_struct *vma, unsigned long addr)480480+static int break_ksm(struct vm_area_struct *vma, unsigned long addr, bool lock_vma)487481{488482 vm_fault_t ret = 0;483483+ const struct mm_walk_ops *ops = lock_vma ?484484+ &break_ksm_lock_vma_ops : &break_ksm_ops;489485490486 do {491487 int ksm_page;492488493489 cond_resched();494494- ksm_page = walk_page_range_vma(vma, addr, addr + 1,495495- &break_ksm_ops, NULL);490490+ ksm_page = walk_page_range_vma(vma, addr, addr + 1, ops, NULL);496491 if (WARN_ON_ONCE(ksm_page < 0))497492 return ksm_page;498493 if (!ksm_page)···579572 mmap_read_lock(mm);580573 vma = find_mergeable_vma(mm, addr);581574 if (vma)582582- break_ksm(vma, addr);575575+ break_ksm(vma, addr, false);583576 mmap_read_unlock(mm);584577}585578···885878 * in cmp_and_merge_page on one of the rmap_items we would be removing.886879 */887880static int unmerge_ksm_pages(struct vm_area_struct *vma,888888- unsigned long start, unsigned long end)881881+ unsigned long start, unsigned long end, bool lock_vma)889882{890883 unsigned long addr;891884 int err = 0;···896889 if (signal_pending(current))897890 err = -ERESTARTSYS;898891 else899899- err = break_ksm(vma, addr);892892+ err = break_ksm(vma, addr, lock_vma);900893 }901894 return err;902895}···10431036 if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)10441037 continue;10451038 err = unmerge_ksm_pages(vma,10461046- vma->vm_start, vma->vm_end);10391039+ vma->vm_start, vma->vm_end, false);10471040 if (err)10481041 goto error;10491042 }···25532546 return 0;2554254725552548 if (vma->anon_vma) {25562556- err = unmerge_ksm_pages(vma, vma->vm_start, vma->vm_end);25492549+ err = unmerge_ksm_pages(vma, vma->vm_start, vma->vm_end, true);25572550 if (err)25582551 return err;25592552 }···26912684 return 0; /* just ignore the advice */2692268526932686 if (vma->anon_vma) {26942694- err = unmerge_ksm_pages(vma, start, end);26872687+ err = unmerge_ksm_pages(vma, start, end, true);26952688 if (err)26962689 return err;26972690 }···28072800 anon_vma->root == vma->anon_vma->root) {28082801 return page; /* still no need to copy it */28092802 }28032803+ if (PageHWPoison(page))28042804+ return ERR_PTR(-EHWPOISON);28102805 if (!PageUptodate(page))28112806 return page; /* let do_swap_page report the error */28122807
···42844284 static const struct mm_walk_ops mm_walk_ops = {42854285 .test_walk = should_skip_vma,42864286 .p4d_entry = walk_pud_range,42874287+ .walk_lock = PGWALK_RDLOCK,42874288 };4288428942894290 int err;···4856485548574856 spin_lock_irq(&pgdat->memcg_lru.lock);4858485748594859- VM_WARN_ON_ONCE(hlist_nulls_unhashed(&lruvec->lrugen.list));48584858+ if (hlist_nulls_unhashed(&lruvec->lrugen.list))48594859+ goto unlock;4860486048614861 gen = lruvec->lrugen.gen;4862486248634863- hlist_nulls_del_rcu(&lruvec->lrugen.list);48634863+ hlist_nulls_del_init_rcu(&lruvec->lrugen.list);48644864 pgdat->memcg_lru.nr_memcgs[gen]--;4865486548664866 if (!pgdat->memcg_lru.nr_memcgs[gen] && gen == get_memcg_gen(pgdat->memcg_lru.seq))48674867 WRITE_ONCE(pgdat->memcg_lru.seq, pgdat->memcg_lru.seq + 1);48684868-48684868+unlock:48694869 spin_unlock_irq(&pgdat->memcg_lru.lock);48704870 }48714871}···54485446 rcu_read_lock();5449544754505448 hlist_nulls_for_each_entry_rcu(lrugen, pos, &pgdat->memcg_lru.fifo[gen][bin], list) {54515451- if (op)54495449+ if (op) {54525450 lru_gen_rotate_memcg(lruvec, op);54515451+ op = 0;54525452+ }5453545354545454 mem_cgroup_put(memcg);54555455···54595455 memcg = lruvec_memcg(lruvec);5460545654615457 if (!mem_cgroup_tryget(memcg)) {54625462- op = 0;54585458+ lru_gen_release_memcg(memcg);54635459 memcg = NULL;54645460 continue;54655461 }
+9-5
mm/zsmalloc.c
···1777177717781778static bool zs_page_isolate(struct page *page, isolate_mode_t mode)17791779{17801780+ struct zs_pool *pool;17801781 struct zspage *zspage;1781178217821783 /*···17871786 VM_BUG_ON_PAGE(PageIsolated(page), page);1788178717891788 zspage = get_zspage(page);17901790- migrate_write_lock(zspage);17891789+ pool = zspage->pool;17901790+ spin_lock(&pool->lock);17911791 inc_zspage_isolation(zspage);17921792- migrate_write_unlock(zspage);17921792+ spin_unlock(&pool->lock);1793179317941794 return true;17951795}···18561854 kunmap_atomic(s_addr);1857185518581856 replace_sub_page(class, zspage, newpage, page);18571857+ dec_zspage_isolation(zspage);18591858 /*18601859 * Since we complete the data copy and set up new zspage structure,18611860 * it's okay to release the pool's lock.18621861 */18631862 spin_unlock(&pool->lock);18641864- dec_zspage_isolation(zspage);18651863 migrate_write_unlock(zspage);1866186418671865 get_page(newpage);···1878187618791877static void zs_page_putback(struct page *page)18801878{18791879+ struct zs_pool *pool;18811880 struct zspage *zspage;1882188118831882 VM_BUG_ON_PAGE(!PageIsolated(page), page);1884188318851884 zspage = get_zspage(page);18861886- migrate_write_lock(zspage);18851885+ pool = zspage->pool;18861886+ spin_lock(&pool->lock);18871887 dec_zspage_isolation(zspage);18881888- migrate_write_unlock(zspage);18881888+ spin_unlock(&pool->lock);18891889}1890189018911891static const struct movable_operations zsmalloc_mops = {
+1-1
tools/testing/radix-tree/regression1.c
···177177 nr_threads = 2;178178 pthread_barrier_init(&worker_barrier, NULL, nr_threads);179179180180- threads = malloc(nr_threads * sizeof(pthread_t *));180180+ threads = malloc(nr_threads * sizeof(*threads));181181182182 for (i = 0; i < nr_threads; i++) {183183 arg = i;
+6-2
tools/testing/selftests/cgroup/test_kmem.c
···7070 goto cleanup;71717272 cg_write(cg, "memory.high", "1M");7373+7474+ /* wait for RCU freeing */7575+ sleep(1);7676+7377 slab1 = cg_read_key_long(cg, "memory.stat", "slab ");7474- if (slab1 <= 0)7878+ if (slab1 < 0)7579 goto cleanup;76807781 current = cg_read_long(cg, "memory.current");7878- if (current <= 0)8282+ if (current < 0)7983 goto cleanup;80848185 if (slab1 < slab0 / 2 && current < slab0 / 2)
+7-2
tools/testing/selftests/mm/hmm-tests.c
···57575858#define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1)))5959/* Just the flags we need, copied from mm.h: */6060-#define FOLL_WRITE 0x01 /* check pte is writable */6161-#define FOLL_LONGTERM 0x10000 /* mapping lifetime is indefinite */62606161+#ifndef FOLL_WRITE6262+#define FOLL_WRITE 0x01 /* check pte is writable */6363+#endif6464+6565+#ifndef FOLL_LONGTERM6666+#define FOLL_LONGTERM 0x100 /* mapping lifetime is indefinite */6767+#endif6368FIXTURE(hmm)6469{6570 int fd;