Merge branch 'mm-hotfixes-stable' into mm-stable to pick up "mm: fix
crashes from deferred split racing folio migration", needed by "mm:
migrate: split folio_migrate_mapping()".
···384384Li Yang <leoyang.li@nxp.com> <leo@zh-kernel.org>385385Lior David <quic_liord@quicinc.com> <liord@codeaurora.org>386386Lorenzo Pieralisi <lpieralisi@kernel.org> <lorenzo.pieralisi@arm.com>387387+Lorenzo Stoakes <lorenzo.stoakes@oracle.com> <lstoakes@gmail.com>387388Luca Ceresoli <luca.ceresoli@bootlin.com> <luca@lucaceresoli.net>388389Lukasz Luba <lukasz.luba@arm.com> <l.luba@partner.samsung.com>389390Luo Jie <quic_luoj@quicinc.com> <luoj@codeaurora.org>
+1-1
MAINTAINERS
···1447614476M: Andrew Morton <akpm@linux-foundation.org>1447714477R: Liam R. Howlett <Liam.Howlett@oracle.com>1447814478R: Vlastimil Babka <vbabka@suse.cz>1447914479-R: Lorenzo Stoakes <lstoakes@gmail.com>1447914479+R: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>1448014480L: linux-mm@kvack.org1448114481S: Maintained1448214482W: http://www.linux-mm.org
+30-2
fs/nilfs2/dir.c
···383383384384struct nilfs_dir_entry *nilfs_dotdot(struct inode *dir, struct folio **foliop)385385{386386- struct nilfs_dir_entry *de = nilfs_get_folio(dir, 0, foliop);386386+ struct folio *folio;387387+ struct nilfs_dir_entry *de, *next_de;388388+ size_t limit;389389+ char *msg;387390391391+ de = nilfs_get_folio(dir, 0, &folio);388392 if (IS_ERR(de))389393 return NULL;390390- return nilfs_next_entry(de);394394+395395+ limit = nilfs_last_byte(dir, 0); /* is a multiple of chunk size */396396+ if (unlikely(!limit || le64_to_cpu(de->inode) != dir->i_ino ||397397+ !nilfs_match(1, ".", de))) {398398+ msg = "missing '.'";399399+ goto fail;400400+ }401401+402402+ next_de = nilfs_next_entry(de);403403+ /*404404+ * If "next_de" has not reached the end of the chunk, there is405405+ * at least one more record. Check whether it matches "..".406406+ */407407+ if (unlikely((char *)next_de == (char *)de + nilfs_chunk_size(dir) ||408408+ !nilfs_match(2, "..", next_de))) {409409+ msg = "missing '..'";410410+ goto fail;411411+ }412412+ *foliop = folio;413413+ return next_de;414414+415415+fail:416416+ nilfs_error(dir->i_sb, "directory #%lu %s", dir->i_ino, msg);417417+ folio_release_kmap(folio, de);418418+ return NULL;391419}392420393421ino_t nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr)
+6-1
fs/userfaultfd.c
···20572057 goto out;20582058 features = uffdio_api.features;20592059 ret = -EINVAL;20602060- if (uffdio_api.api != UFFD_API || (features & ~UFFD_API_FEATURES))20602060+ if (uffdio_api.api != UFFD_API)20612061 goto err_out;20622062 ret = -EPERM;20632063 if ((features & UFFD_FEATURE_EVENT_FORK) && !capable(CAP_SYS_PTRACE))···20812081 uffdio_api.features &= ~UFFD_FEATURE_WP_UNPOPULATED;20822082 uffdio_api.features &= ~UFFD_FEATURE_WP_ASYNC;20832083#endif20842084+20852085+ ret = -EINVAL;20862086+ if (features & ~uffdio_api.features)20872087+ goto err_out;20882088+20842089 uffdio_api.ioctls = UFFD_API_IOCTLS;20852090 ret = -EFAULT;20862091 if (copy_to_user(buf, &uffdio_api, sizeof(uffdio_api)))
+2-1
include/linux/mmzone.h
···19811981static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)19821982{19831983 int idx = subsection_map_index(pfn);19841984+ struct mem_section_usage *usage = READ_ONCE(ms->usage);1984198519851985- return test_bit(idx, READ_ONCE(ms->usage)->subsection_map);19861986+ return usage ? test_bit(idx, usage->subsection_map) : 0;19861987}19871988#else19881989static inline int pfn_section_valid(struct mem_section *ms, unsigned long pfn)
+9-48
include/linux/page_ref.h
···230230231231static inline bool page_ref_add_unless(struct page *page, int nr, int u)232232{233233- bool ret = atomic_add_unless(&page->_refcount, nr, u);233233+ bool ret = false;234234+235235+ rcu_read_lock();236236+ /* avoid writing to the vmemmap area being remapped */237237+ if (!page_is_fake_head(page) && page_ref_count(page) != u)238238+ ret = atomic_add_unless(&page->_refcount, nr, u);239239+ rcu_read_unlock();234240235241 if (page_ref_tracepoint_active(page_ref_mod_unless))236242 __page_ref_mod_unless(page, nr, ret);···264258 return folio_ref_add_unless(folio, 1, 0);265259}266260267267-static inline bool folio_ref_try_add_rcu(struct folio *folio, int count)261261+static inline bool folio_ref_try_add(struct folio *folio, int count)268262{269269-#ifdef CONFIG_TINY_RCU270270- /*271271- * The caller guarantees the folio will not be freed from interrupt272272- * context, so (on !SMP) we only need preemption to be disabled273273- * and TINY_RCU does that for us.274274- */275275-# ifdef CONFIG_PREEMPT_COUNT276276- VM_BUG_ON(!in_atomic() && !irqs_disabled());277277-# endif278278- VM_BUG_ON_FOLIO(folio_ref_count(folio) == 0, folio);279279- folio_ref_add(folio, count);280280-#else281281- if (unlikely(!folio_ref_add_unless(folio, count, 0))) {282282- /* Either the folio has been freed, or will be freed. */283283- return false;284284- }285285-#endif286286- return true;287287-}288288-289289-/**290290- * folio_try_get_rcu - Attempt to increase the refcount on a folio.291291- * @folio: The folio.292292- *293293- * This is a version of folio_try_get() optimised for non-SMP kernels.294294- * If you are still holding the rcu_read_lock() after looking up the295295- * page and know that the page cannot have its refcount decreased to296296- * zero in interrupt context, you can use this instead of folio_try_get().297297- *298298- * Example users include get_user_pages_fast() (as pages are not unmapped299299- * from interrupt context) and the page cache lookups (as pages are not300300- * truncated from interrupt context). We also know that pages are not301301- * frozen in interrupt context for the purposes of splitting or migration.302302- *303303- * You can also use this function if you're holding a lock that prevents304304- * pages being frozen & removed; eg the i_pages lock for the page cache305305- * or the mmap_lock or page table lock for page tables. In this case,306306- * it will always succeed, and you could have used a plain folio_get(),307307- * but it's sometimes more convenient to have a common function called308308- * from both locked and RCU-protected contexts.309309- *310310- * Return: True if the reference count was successfully incremented.311311- */312312-static inline bool folio_try_get_rcu(struct folio *folio)313313-{314314- return folio_ref_try_add_rcu(folio, 1);263263+ return folio_ref_add_unless(folio, count, 0);315264}316265317266static inline int page_ref_freeze(struct page *page, int count)
+9-2
include/linux/pagemap.h
···354354 * a good order (that's 1MB if you're using 4kB pages)355355 */356356#ifdef CONFIG_TRANSPARENT_HUGEPAGE357357-#define MAX_PAGECACHE_ORDER HPAGE_PMD_ORDER357357+#define PREFERRED_MAX_PAGECACHE_ORDER HPAGE_PMD_ORDER358358#else359359-#define MAX_PAGECACHE_ORDER 8359359+#define PREFERRED_MAX_PAGECACHE_ORDER 8360360#endif361361+362362+/*363363+ * xas_split_alloc() does not support arbitrary orders. This implies no364364+ * 512MB THP on ARM64 with 64KB base page size.365365+ */366366+#define MAX_XAS_ORDER (XA_CHUNK_SHIFT * 2 - 1)367367+#define MAX_PAGECACHE_ORDER min(MAX_XAS_ORDER, PREFERRED_MAX_PAGECACHE_ORDER)361368362369/**363370 * mapping_set_large_folios() - Indicate the file supports large folios.
···3838#3939open C_FILE, ">$ARGV[1]" or die;4040print C_FILE "/*\n";4141-print C_FILE " * Automatically generated by ", $0 =~ s#^\Q$abs_srctree/\E##r, ". Do not edit\n";4141+my $scriptname = $0;4242+$scriptname =~ s#^\Q$abs_srctree/\E##;4343+print C_FILE " * Automatically generated by ", $scriptname, ". Do not edit\n";4244print C_FILE " */\n";43454446#
+19-2
mm/damon/core.c
···16941694 * access frequencies are similar. This is for minimizing the monitoring16951695 * overhead under the dynamically changeable access pattern. If a merge was16961696 * unnecessarily made, later 'kdamond_split_regions()' will revert it.16971697+ *16981698+ * The total number of regions could be higher than the user-defined limit,16991699+ * max_nr_regions for some cases. For example, the user can update17001700+ * max_nr_regions to a number that lower than the current number of regions17011701+ * while DAMON is running. For such a case, repeat merging until the limit is17021702+ * met while increasing @threshold up to possible maximum level.16971703 */16981704static void kdamond_merge_regions(struct damon_ctx *c, unsigned int threshold,16991705 unsigned long sz_limit)17001706{17011707 struct damon_target *t;17081708+ unsigned int nr_regions;17091709+ unsigned int max_thres;1702171017031703- damon_for_each_target(t, c)17041704- damon_merge_regions_of(t, threshold, sz_limit);17111711+ max_thres = c->attrs.aggr_interval /17121712+ (c->attrs.sample_interval ? c->attrs.sample_interval : 1);17131713+ do {17141714+ nr_regions = 0;17151715+ damon_for_each_target(t, c) {17161716+ damon_merge_regions_of(t, threshold, sz_limit);17171717+ nr_regions += damon_nr_regions(t);17181718+ }17191719+ threshold = max(1, threshold * 2);17201720+ } while (nr_regions > c->attrs.max_nr_regions &&17211721+ threshold / 2 < max_thres);17051722}1706172317071724/*
+10-7
mm/filemap.c
···18471847 if (!folio || xa_is_value(folio))18481848 goto out;1849184918501850- if (!folio_try_get_rcu(folio))18501850+ if (!folio_try_get(folio))18511851 goto repeat;1852185218531853 if (unlikely(folio != xas_reload(&xas))) {···20012001 if (!folio || xa_is_value(folio))20022002 return folio;2003200320042004- if (!folio_try_get_rcu(folio))20042004+ if (!folio_try_get(folio))20052005 goto reset;2006200620072007 if (unlikely(folio != xas_reload(xas))) {···21812181 if (xa_is_value(folio))21822182 goto update_start;2183218321842184- if (!folio_try_get_rcu(folio))21842184+ if (!folio_try_get(folio))21852185 goto retry;2186218621872187 if (unlikely(folio != xas_reload(&xas)))···23132313 break;23142314 if (xa_is_sibling(folio))23152315 break;23162316- if (!folio_try_get_rcu(folio))23162316+ if (!folio_try_get(folio))23172317 goto retry;2318231823192319 if (unlikely(folio != xas_reload(&xas)))···3124312431253125#ifdef CONFIG_TRANSPARENT_HUGEPAGE31263126 /* Use the readahead code, even if readahead is disabled */31273127- if (vm_flags & VM_HUGEPAGE) {31273127+ if ((vm_flags & VM_HUGEPAGE) && HPAGE_PMD_ORDER <= MAX_PAGECACHE_ORDER) {31283128 fpin = maybe_unlock_mmap_for_io(vmf, fpin);31293129 ractl._index &= ~((unsigned long)HPAGE_PMD_NR - 1);31303130 ra->size = HPAGE_PMD_NR;···34723472 continue;34733473 if (folio_test_locked(folio))34743474 continue;34753475- if (!folio_try_get_rcu(folio))34753475+ if (!folio_try_get(folio))34763476 continue;34773477 /* Has the page moved or been split? */34783478 if (unlikely(folio != xas_reload(xas)))···42484248 XA_STATE(xas, &mapping->i_pages, first_index);42494249 struct folio *folio;4250425042514251+ /* Flush stats (and potentially sleep) outside the RCU read section. */42524252+ mem_cgroup_flush_stats_ratelimited(NULL);42534253+42514254 rcu_read_lock();42524255 xas_for_each(&xas, folio, last_index) {42534256 int order;···43144311 goto resched;43154312 }43164313#endif43174317- if (workingset_test_recent(shadow, true, &workingset))43144314+ if (workingset_test_recent(shadow, true, &workingset, false))43184315 cs->nr_recently_evicted += nr_pages;4319431643204317 goto resched;
+154-137
mm/gup.c
···7676 folio = page_folio(page);7777 if (WARN_ON_ONCE(folio_ref_count(folio) < 0))7878 return NULL;7979- if (unlikely(!folio_ref_try_add_rcu(folio, refs)))7979+ if (unlikely(!folio_ref_try_add(folio, refs)))8080 return NULL;81818282 /*···9393 folio_put_refs(folio, refs);9494 goto retry;9595 }9696-9797- return folio;9898-}9999-100100-/**101101- * try_grab_folio() - Attempt to get or pin a folio.102102- * @page: pointer to page to be grabbed103103- * @refs: the value to (effectively) add to the folio's refcount104104- * @flags: gup flags: these are the FOLL_* flag values.105105- *106106- * "grab" names in this file mean, "look at flags to decide whether to use107107- * FOLL_PIN or FOLL_GET behavior, when incrementing the folio's refcount.108108- *109109- * Either FOLL_PIN or FOLL_GET (or neither) must be set, but not both at the110110- * same time. (That's true throughout the get_user_pages*() and111111- * pin_user_pages*() APIs.) Cases:112112- *113113- * FOLL_GET: folio's refcount will be incremented by @refs.114114- *115115- * FOLL_PIN on large folios: folio's refcount will be incremented by116116- * @refs, and its pincount will be incremented by @refs.117117- *118118- * FOLL_PIN on single-page folios: folio's refcount will be incremented by119119- * @refs * GUP_PIN_COUNTING_BIAS.120120- *121121- * Return: The folio containing @page (with refcount appropriately122122- * incremented) for success, or NULL upon failure. If neither FOLL_GET123123- * nor FOLL_PIN was set, that's considered failure, and furthermore,124124- * a likely bug in the caller, so a warning is also emitted.125125- */126126-struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags)127127-{128128- struct folio *folio;129129-130130- if (WARN_ON_ONCE((flags & (FOLL_GET | FOLL_PIN)) == 0))131131- return NULL;132132-133133- if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(page)))134134- return NULL;135135-136136- if (flags & FOLL_GET)137137- return try_get_folio(page, refs);138138-139139- /* FOLL_PIN is set */140140-141141- /*142142- * Don't take a pin on the zero page - it's not going anywhere143143- * and it is used in a *lot* of places.144144- */145145- if (is_zero_page(page))146146- return page_folio(page);147147-148148- folio = try_get_folio(page, refs);149149- if (!folio)150150- return NULL;151151-152152- /*153153- * Can't do FOLL_LONGTERM + FOLL_PIN gup fast path if not in a154154- * right zone, so fail and let the caller fall back to the slow155155- * path.156156- */157157- if (unlikely((flags & FOLL_LONGTERM) &&158158- !folio_is_longterm_pinnable(folio))) {159159- if (!put_devmap_managed_folio_refs(folio, refs))160160- folio_put_refs(folio, refs);161161- return NULL;162162- }163163-164164- /*165165- * When pinning a large folio, use an exact count to track it.166166- *167167- * However, be sure to *also* increment the normal folio168168- * refcount field at least once, so that the folio really169169- * is pinned. That's why the refcount from the earlier170170- * try_get_folio() is left intact.171171- */172172- if (folio_test_large(folio))173173- atomic_add(refs, &folio->_pincount);174174- else175175- folio_ref_add(folio,176176- refs * (GUP_PIN_COUNTING_BIAS - 1));177177- /*178178- * Adjust the pincount before re-checking the PTE for changes.179179- * This is essentially a smp_mb() and is paired with a memory180180- * barrier in folio_try_share_anon_rmap_*().181181- */182182- smp_mb__after_atomic();183183-184184- node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, refs);1859618697 return folio;18798}···114203}115204116205/**117117- * try_grab_page() - elevate a page's refcount by a flag-dependent amount118118- * @page: pointer to page to be grabbed119119- * @flags: gup flags: these are the FOLL_* flag values.206206+ * try_grab_folio() - add a folio's refcount by a flag-dependent amount207207+ * @folio: pointer to folio to be grabbed208208+ * @refs: the value to (effectively) add to the folio's refcount209209+ * @flags: gup flags: these are the FOLL_* flag values120210 *121211 * This might not do anything at all, depending on the flags argument.122212 *123213 * "grab" names in this file mean, "look at flags to decide whether to use124124- * FOLL_PIN or FOLL_GET behavior, when incrementing the page's refcount.214214+ * FOLL_PIN or FOLL_GET behavior, when incrementing the folio's refcount.125215 *126216 * Either FOLL_PIN or FOLL_GET (or neither) may be set, but not both at the same127127- * time. Cases: please see the try_grab_folio() documentation, with128128- * "refs=1".217217+ * time.129218 *130219 * Return: 0 for success, or if no action was required (if neither FOLL_PIN131220 * nor FOLL_GET was set, nothing is done). A negative error code for failure:132221 *133133- * -ENOMEM FOLL_GET or FOLL_PIN was set, but the page could not222222+ * -ENOMEM FOLL_GET or FOLL_PIN was set, but the folio could not134223 * be grabbed.224224+ *225225+ * It is called when we have a stable reference for the folio, typically in226226+ * GUP slow path.135227 */136136-int __must_check try_grab_page(struct page *page, unsigned int flags)228228+int __must_check try_grab_folio(struct folio *folio, int refs,229229+ unsigned int flags)137230{138138- struct folio *folio = page_folio(page);139139-140231 if (WARN_ON_ONCE(folio_ref_count(folio) <= 0))141232 return -ENOMEM;142233143143- if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(page)))234234+ if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(&folio->page)))144235 return -EREMOTEIO;145236146237 if (flags & FOLL_GET)147147- folio_ref_inc(folio);238238+ folio_ref_add(folio, refs);148239 else if (flags & FOLL_PIN) {149240 /*150241 * Don't take a pin on the zero page - it's not going anywhere151242 * and it is used in a *lot* of places.152243 */153153- if (is_zero_page(page))244244+ if (is_zero_folio(folio))154245 return 0;155246156247 /*157157- * Similar to try_grab_folio(): be sure to *also*158158- * increment the normal page refcount field at least once,248248+ * Increment the normal page refcount field at least once,159249 * so that the page really is pinned.160250 */161251 if (folio_test_large(folio)) {162162- folio_ref_add(folio, 1);163163- atomic_add(1, &folio->_pincount);252252+ folio_ref_add(folio, refs);253253+ atomic_add(refs, &folio->_pincount);164254 } else {165165- folio_ref_add(folio, GUP_PIN_COUNTING_BIAS);255255+ folio_ref_add(folio, refs * GUP_PIN_COUNTING_BIAS);166256 }167257168168- node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, 1);258258+ node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, refs);169259 }170260171261 return 0;···427515428516 return nr;429517}518518+519519+/**520520+ * try_grab_folio_fast() - Attempt to get or pin a folio in fast path.521521+ * @page: pointer to page to be grabbed522522+ * @refs: the value to (effectively) add to the folio's refcount523523+ * @flags: gup flags: these are the FOLL_* flag values.524524+ *525525+ * "grab" names in this file mean, "look at flags to decide whether to use526526+ * FOLL_PIN or FOLL_GET behavior, when incrementing the folio's refcount.527527+ *528528+ * Either FOLL_PIN or FOLL_GET (or neither) must be set, but not both at the529529+ * same time. (That's true throughout the get_user_pages*() and530530+ * pin_user_pages*() APIs.) Cases:531531+ *532532+ * FOLL_GET: folio's refcount will be incremented by @refs.533533+ *534534+ * FOLL_PIN on large folios: folio's refcount will be incremented by535535+ * @refs, and its pincount will be incremented by @refs.536536+ *537537+ * FOLL_PIN on single-page folios: folio's refcount will be incremented by538538+ * @refs * GUP_PIN_COUNTING_BIAS.539539+ *540540+ * Return: The folio containing @page (with refcount appropriately541541+ * incremented) for success, or NULL upon failure. If neither FOLL_GET542542+ * nor FOLL_PIN was set, that's considered failure, and furthermore,543543+ * a likely bug in the caller, so a warning is also emitted.544544+ *545545+ * It uses add ref unless zero to elevate the folio refcount and must be called546546+ * in fast path only.547547+ */548548+static struct folio *try_grab_folio_fast(struct page *page, int refs,549549+ unsigned int flags)550550+{551551+ struct folio *folio;552552+553553+ /* Raise warn if it is not called in fast GUP */554554+ VM_WARN_ON_ONCE(!irqs_disabled());555555+556556+ if (WARN_ON_ONCE((flags & (FOLL_GET | FOLL_PIN)) == 0))557557+ return NULL;558558+559559+ if (unlikely(!(flags & FOLL_PCI_P2PDMA) && is_pci_p2pdma_page(page)))560560+ return NULL;561561+562562+ if (flags & FOLL_GET)563563+ return try_get_folio(page, refs);564564+565565+ /* FOLL_PIN is set */566566+567567+ /*568568+ * Don't take a pin on the zero page - it's not going anywhere569569+ * and it is used in a *lot* of places.570570+ */571571+ if (is_zero_page(page))572572+ return page_folio(page);573573+574574+ folio = try_get_folio(page, refs);575575+ if (!folio)576576+ return NULL;577577+578578+ /*579579+ * Can't do FOLL_LONGTERM + FOLL_PIN gup fast path if not in a580580+ * right zone, so fail and let the caller fall back to the slow581581+ * path.582582+ */583583+ if (unlikely((flags & FOLL_LONGTERM) &&584584+ !folio_is_longterm_pinnable(folio))) {585585+ if (!put_devmap_managed_folio_refs(folio, refs))586586+ folio_put_refs(folio, refs);587587+ return NULL;588588+ }589589+590590+ /*591591+ * When pinning a large folio, use an exact count to track it.592592+ *593593+ * However, be sure to *also* increment the normal folio594594+ * refcount field at least once, so that the folio really595595+ * is pinned. That's why the refcount from the earlier596596+ * try_get_folio() is left intact.597597+ */598598+ if (folio_test_large(folio))599599+ atomic_add(refs, &folio->_pincount);600600+ else601601+ folio_ref_add(folio,602602+ refs * (GUP_PIN_COUNTING_BIAS - 1));603603+ /*604604+ * Adjust the pincount before re-checking the PTE for changes.605605+ * This is essentially a smp_mb() and is paired with a memory606606+ * barrier in folio_try_share_anon_rmap_*().607607+ */608608+ smp_mb__after_atomic();609609+610610+ node_stat_mod_folio(folio, NR_FOLL_PIN_ACQUIRED, refs);611611+612612+ return folio;613613+}430614#endif /* CONFIG_ARCH_HAS_HUGEPD || CONFIG_HAVE_GUP_FAST */431615432616#ifdef CONFIG_ARCH_HAS_HUGEPD···543535 */544536static int gup_hugepte(struct vm_area_struct *vma, pte_t *ptep, unsigned long sz,545537 unsigned long addr, unsigned long end, unsigned int flags,546546- struct page **pages, int *nr)538538+ struct page **pages, int *nr, bool fast)547539{548540 unsigned long pte_end;549541 struct page *page;···566558 page = pte_page(pte);567559 refs = record_subpages(page, sz, addr, end, pages + *nr);568560569569- folio = try_grab_folio(page, refs, flags);570570- if (!folio)571571- return 0;561561+ if (fast) {562562+ folio = try_grab_folio_fast(page, refs, flags);563563+ if (!folio)564564+ return 0;565565+ } else {566566+ folio = page_folio(page);567567+ if (try_grab_folio(folio, refs, flags))568568+ return 0;569569+ }572570573571 if (unlikely(pte_val(pte) != pte_val(ptep_get(ptep)))) {574572 gup_put_folio(folio, refs, flags);···602588static int gup_hugepd(struct vm_area_struct *vma, hugepd_t hugepd,603589 unsigned long addr, unsigned int pdshift,604590 unsigned long end, unsigned int flags,605605- struct page **pages, int *nr)591591+ struct page **pages, int *nr, bool fast)606592{607593 pte_t *ptep;608594 unsigned long sz = 1UL << hugepd_shift(hugepd);···612598 ptep = hugepte_offset(hugepd, addr, pdshift);613599 do {614600 next = hugepte_addr_end(addr, end, sz);615615- ret = gup_hugepte(vma, ptep, sz, addr, end, flags, pages, nr);601601+ ret = gup_hugepte(vma, ptep, sz, addr, end, flags, pages, nr,602602+ fast);616603 if (ret != 1)617604 return ret;618605 } while (ptep++, addr = next, addr != end);···640625 ptep = hugepte_offset(hugepd, addr, pdshift);641626 ptl = huge_pte_lock(h, vma->vm_mm, ptep);642627 ret = gup_hugepd(vma, hugepd, addr, pdshift, addr + PAGE_SIZE,643643- flags, &page, &nr);628628+ flags, &page, &nr, false);644629 spin_unlock(ptl);645630646631 if (ret == 1) {···657642static inline int gup_hugepd(struct vm_area_struct *vma, hugepd_t hugepd,658643 unsigned long addr, unsigned int pdshift,659644 unsigned long end, unsigned int flags,660660- struct page **pages, int *nr)645645+ struct page **pages, int *nr, bool fast)661646{662647 return 0;663648}···744729 gup_must_unshare(vma, flags, page))745730 return ERR_PTR(-EMLINK);746731747747- ret = try_grab_page(page, flags);732732+ ret = try_grab_folio(page_folio(page), 1, flags);748733 if (ret)749734 page = ERR_PTR(ret);750735 else···821806 VM_BUG_ON_PAGE((flags & FOLL_PIN) && PageAnon(page) &&822807 !PageAnonExclusive(page), page);823808824824- ret = try_grab_page(page, flags);809809+ ret = try_grab_folio(page_folio(page), 1, flags);825810 if (ret)826811 return ERR_PTR(ret);827812···983968 VM_BUG_ON_PAGE((flags & FOLL_PIN) && PageAnon(page) &&984969 !PageAnonExclusive(page), page);985970986986- /* try_grab_page() does nothing unless FOLL_GET or FOLL_PIN is set. */987987- ret = try_grab_page(page, flags);971971+ /* try_grab_folio() does nothing unless FOLL_GET or FOLL_PIN is set. */972972+ ret = try_grab_folio(page_folio(page), 1, flags);988973 if (unlikely(ret)) {989974 page = ERR_PTR(ret);990975 goto out;···12481233 goto unmap;12491234 *page = pte_page(entry);12501235 }12511251- ret = try_grab_page(*page, gup_flags);12361236+ ret = try_grab_folio(page_folio(*page), 1, gup_flags);12521237 if (unlikely(ret))12531238 goto unmap;12541239out:···16511636 * pages.16521637 */16531638 if (page_increm > 1) {16541654- struct folio *folio;16391639+ struct folio *folio = page_folio(page);1655164016561641 /*16571642 * Since we already hold refcount on the16581643 * large folio, this should never fail.16591644 */16601660- folio = try_grab_folio(page, page_increm - 1,16611661- foll_flags);16621662- if (WARN_ON_ONCE(!folio)) {16451645+ if (try_grab_folio(folio, page_increm - 1,16461646+ foll_flags)) {16631647 /*16641648 * Release the 1st page ref if the16651649 * folio is problematic, fail hard.16661650 */16671667- gup_put_folio(page_folio(page), 1,16511651+ gup_put_folio(folio, 1,16681652 foll_flags);16691653 ret = -EFAULT;16701654 goto out;···28112797 * This code is based heavily on the PowerPC implementation by Nick Piggin.28122798 */28132799#ifdef CONFIG_HAVE_GUP_FAST28142814-28152800/*28162801 * Used in the GUP-fast path to determine whether GUP is permitted to work on28172802 * a specific folio.···29752962 VM_BUG_ON(!pfn_valid(pte_pfn(pte)));29762963 page = pte_page(pte);2977296429782978- folio = try_grab_folio(page, 1, flags);29652965+ folio = try_grab_folio_fast(page, 1, flags);29792966 if (!folio)29802967 goto pte_unmap;29812968···30623049 break;30633050 }3064305130653065- folio = try_grab_folio(page, 1, flags);30523052+ folio = try_grab_folio_fast(page, 1, flags);30663053 if (!folio) {30673054 gup_fast_undo_dev_pagemap(nr, nr_start, flags, pages);30683055 break;···31513138 page = pmd_page(orig);31523139 refs = record_subpages(page, PMD_SIZE, addr, end, pages + *nr);3153314031543154- folio = try_grab_folio(page, refs, flags);31413141+ folio = try_grab_folio_fast(page, refs, flags);31553142 if (!folio)31563143 return 0;31573144···31953182 page = pud_page(orig);31963183 refs = record_subpages(page, PUD_SIZE, addr, end, pages + *nr);3197318431983198- folio = try_grab_folio(page, refs, flags);31853185+ folio = try_grab_folio_fast(page, refs, flags);31993186 if (!folio)32003187 return 0;32013188···32353222 page = pgd_page(orig);32363223 refs = record_subpages(page, PGDIR_SIZE, addr, end, pages + *nr);3237322432383238- folio = try_grab_folio(page, refs, flags);32253225+ folio = try_grab_folio_fast(page, refs, flags);32393226 if (!folio)32403227 return 0;32413228···32893276 * pmd format and THP pmd format32903277 */32913278 if (gup_hugepd(NULL, __hugepd(pmd_val(pmd)), addr,32923292- PMD_SHIFT, next, flags, pages, nr) != 1)32793279+ PMD_SHIFT, next, flags, pages, nr,32803280+ true) != 1)32933281 return 0;32943282 } else if (!gup_fast_pte_range(pmd, pmdp, addr, next, flags,32953283 pages, nr))···33203306 return 0;33213307 } else if (unlikely(is_hugepd(__hugepd(pud_val(pud))))) {33223308 if (gup_hugepd(NULL, __hugepd(pud_val(pud)), addr,33233323- PUD_SHIFT, next, flags, pages, nr) != 1)33093309+ PUD_SHIFT, next, flags, pages, nr,33103310+ true) != 1)33243311 return 0;33253312 } else if (!gup_fast_pmd_range(pudp, pud, addr, next, flags,33263313 pages, nr))···33483333 BUILD_BUG_ON(p4d_leaf(p4d));33493334 if (unlikely(is_hugepd(__hugepd(p4d_val(p4d))))) {33503335 if (gup_hugepd(NULL, __hugepd(p4d_val(p4d)), addr,33513351- P4D_SHIFT, next, flags, pages, nr) != 1)33363336+ P4D_SHIFT, next, flags, pages, nr,33373337+ true) != 1)33523338 return 0;33533339 } else if (!gup_fast_pud_range(p4dp, p4d, addr, next, flags,33543340 pages, nr))···33783362 return;33793363 } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {33803364 if (gup_hugepd(NULL, __hugepd(pgd_val(pgd)), addr,33813381- PGDIR_SHIFT, next, flags, pages, nr) != 1)33653365+ PGDIR_SHIFT, next, flags, pages, nr,33663366+ true) != 1)33823367 return;33833368 } else if (!gup_fast_p4d_range(pgdp, pgd, addr, next, flags,33843369 pages, nr))
+1-1
mm/huge_memory.c
···13331333 if (!*pgmap)13341334 return ERR_PTR(-EFAULT);13351335 page = pfn_to_page(pfn);13361336- ret = try_grab_page(page, flags);13361336+ ret = try_grab_folio(page_folio(page), 1, flags);13371337 if (ret)13381338 page = ERR_PTR(ret);13391339
+7-46
mm/hugetlb.c
···16291629 * folio appears as just a compound page. Otherwise, wait until after16301630 * allocating vmemmap to clear the flag.16311631 *16321632- * A reference is held on the folio, except in the case of demote.16331633- *16341632 * Must be called with hugetlb lock held.16351633 */16361636-static void __remove_hugetlb_folio(struct hstate *h, struct folio *folio,16371637- bool adjust_surplus,16381638- bool demote)16341634+static void remove_hugetlb_folio(struct hstate *h, struct folio *folio,16351635+ bool adjust_surplus)16391636{16401637 int nid = folio_nid(folio);16411638···16461649 list_del(&folio->lru);1647165016481651 if (folio_test_hugetlb_freed(folio)) {16521652+ folio_clear_hugetlb_freed(folio);16491653 h->free_huge_pages--;16501654 h->free_huge_pages_node[nid]--;16511655 }···16631665 if (!folio_test_hugetlb_vmemmap_optimized(folio))16641666 __folio_clear_hugetlb(folio);1665166716661666- /*16671667- * In the case of demote we do not ref count the page as it will soon16681668- * be turned into a page of smaller size.16691669- */16701670- if (!demote)16711671- folio_ref_unfreeze(folio, 1);16721672-16731668 h->nr_huge_pages--;16741669 h->nr_huge_pages_node[nid]--;16751675-}16761676-16771677-static void remove_hugetlb_folio(struct hstate *h, struct folio *folio,16781678- bool adjust_surplus)16791679-{16801680- __remove_hugetlb_folio(h, folio, adjust_surplus, false);16811681-}16821682-16831683-static void remove_hugetlb_folio_for_demote(struct hstate *h, struct folio *folio,16841684- bool adjust_surplus)16851685-{16861686- __remove_hugetlb_folio(h, folio, adjust_surplus, true);16871670}1688167116891672static void add_hugetlb_folio(struct hstate *h, struct folio *folio,16901673 bool adjust_surplus)16911674{16921692- int zeroed;16931675 int nid = folio_nid(folio);1694167616951677 VM_BUG_ON_FOLIO(!folio_test_hugetlb_vmemmap_optimized(folio), folio);···16921714 * folio_change_private(folio, NULL) cleared it.16931715 */16941716 folio_set_hugetlb_vmemmap_optimized(folio);16951695-16961696- /*16971697- * This folio is about to be managed by the hugetlb allocator and16981698- * should have no users. Drop our reference, and check for others16991699- * just in case.17001700- */17011701- zeroed = folio_put_testzero(folio);17021702- if (unlikely(!zeroed))17031703- /*17041704- * It is VERY unlikely soneone else has taken a ref17051705- * on the folio. In this case, we simply return as17061706- * free_huge_folio() will be called when this other ref17071707- * is dropped.17081708- */17091709- return;1710171717111718 arch_clear_hugetlb_flags(folio);17121719 enqueue_hugetlb_folio(h, folio);···17451782 __folio_clear_hugetlb(folio);17461783 spin_unlock_irq(&hugetlb_lock);17471784 }17851785+17861786+ folio_ref_unfreeze(folio, 1);1748178717491788 /*17501789 * Non-gigantic pages demoted from CMA allocated gigantic pages···3071310630723107free_new:30733108 spin_unlock_irq(&hugetlb_lock);30743074- if (new_folio) {30753075- /* Folio has a zero ref count, but needs a ref to be freed */30763076- folio_ref_unfreeze(new_folio, 1);31093109+ if (new_folio)30773110 update_and_free_hugetlb_folio(h, new_folio, false);30783078- }3079311130803112 return ret;30813113}···3927396539283966 target_hstate = size_to_hstate(PAGE_SIZE << h->demote_order);3929396739303930- remove_hugetlb_folio_for_demote(h, folio, false);39683968+ remove_hugetlb_folio(h, folio, false);39313969 spin_unlock_irq(&hugetlb_lock);3932397039333971 /*···39413979 if (rc) {39423980 /* Allocation of vmemmmap failed, we can not demote folio */39433981 spin_lock_irq(&hugetlb_lock);39443944- folio_ref_unfreeze(folio, 1);39453982 add_hugetlb_folio(h, folio, false);39463983 return rc;39473984 }
+16
mm/hugetlb_vmemmap.c
···455455 unsigned long vmemmap_reuse;456456457457 VM_WARN_ON_ONCE_FOLIO(!folio_test_hugetlb(folio), folio);458458+ VM_WARN_ON_ONCE_FOLIO(folio_ref_count(folio), folio);459459+458460 if (!folio_test_hugetlb_vmemmap_optimized(folio))459461 return 0;460462···492490 */493491int hugetlb_vmemmap_restore_folio(const struct hstate *h, struct folio *folio)494492{493493+ /* avoid writes from page_ref_add_unless() while unfolding vmemmap */494494+ synchronize_rcu();495495+495496 return __hugetlb_vmemmap_restore_folio(h, folio, 0);496497}497498···518513 struct folio *folio, *t_folio;519514 long restored = 0;520515 long ret = 0;516516+517517+ /* avoid writes from page_ref_add_unless() while unfolding vmemmap */518518+ synchronize_rcu();521519522520 list_for_each_entry_safe(folio, t_folio, folio_list, lru) {523521 if (folio_test_hugetlb_vmemmap_optimized(folio)) {···567559 unsigned long vmemmap_reuse;568560569561 VM_WARN_ON_ONCE_FOLIO(!folio_test_hugetlb(folio), folio);562562+ VM_WARN_ON_ONCE_FOLIO(folio_ref_count(folio), folio);563563+570564 if (!vmemmap_should_optimize_folio(h, folio))571565 return ret;572566···620610{621611 LIST_HEAD(vmemmap_pages);622612613613+ /* avoid writes from page_ref_add_unless() while folding vmemmap */614614+ synchronize_rcu();615615+623616 __hugetlb_vmemmap_optimize_folio(h, folio, &vmemmap_pages, 0);624617 free_vmemmap_page_list(&vmemmap_pages);625618}···665652 }666653667654 flush_tlb_all();655655+656656+ /* avoid writes from page_ref_add_unless() while folding vmemmap */657657+ synchronize_rcu();668658669659 list_for_each_entry(folio, folio_list, lru) {670660 int ret;
+2-2
mm/internal.h
···12261226/*12271227 * mm/gup.c12281228 */12291229-struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags);12301230-int __must_check try_grab_page(struct page *page, unsigned int flags);12291229+int __must_check try_grab_folio(struct folio *folio, int refs,12301230+ unsigned int flags);1231123112321232/*12331233 * mm/huge_memory.c
-11
mm/memcontrol.c
···4948494849494949 /* Transfer the charge and the css ref */49504950 commit_charge(new, memcg);49514951- /*49524952- * If the old folio is a large folio and is in the split queue, it needs49534953- * to be removed from the split queue now, in case getting an incorrect49544954- * split queue in destroy_large_folio() after the memcg of the old folio49554955- * is cleared.49564956- *49574957- * In addition, the old folio is about to be freed after migration, so49584958- * removing from the split queue a bit earlier seems reasonable.49594959- */49604960- if (folio_test_large(old) && folio_test_large_rmappable(old))49614961- folio_undo_large_rmappable(old);49624951 old->memcg_data = 0;49634952}49644953
+13
mm/migrate.c
···415415 if (folio_ref_count(folio) != expected_count)416416 return -EAGAIN;417417418418+ /* Take off deferred split queue while frozen and memcg set */419419+ if (folio_test_large(folio) &&420420+ folio_test_large_rmappable(folio)) {421421+ if (!folio_ref_freeze(folio, expected_count))422422+ return -EAGAIN;423423+ folio_undo_large_rmappable(folio);424424+ folio_ref_unfreeze(folio, expected_count);425425+ }426426+418427 /* No turning back from here */419428 newfolio->index = folio->index;420429 newfolio->mapping = folio->mapping;···441432 xas_unlock_irq(&xas);442433 return -EAGAIN;443434 }435435+436436+ /* Take off deferred split queue while frozen and memcg set */437437+ if (folio_test_large(folio) && folio_test_large_rmappable(folio))438438+ folio_undo_large_rmappable(folio);444439445440 /*446441 * Now we know that no one else is looking at the folio:
+4-4
mm/readahead.c
···449449450450 limit = min(limit, index + ra->size - 1);451451452452- if (new_order < MAX_PAGECACHE_ORDER) {452452+ if (new_order < MAX_PAGECACHE_ORDER)453453 new_order += 2;454454- new_order = min_t(unsigned int, MAX_PAGECACHE_ORDER, new_order);455455- new_order = min_t(unsigned int, new_order, ilog2(ra->size));456456- }454454+455455+ new_order = min_t(unsigned int, MAX_PAGECACHE_ORDER, new_order);456456+ new_order = min_t(unsigned int, new_order, ilog2(ra->size));457457458458 /* See comment in page_cache_ra_unbounded() */459459 nofs = memalloc_nofs_save();
···25442544static struct xarray *25452545addr_to_vb_xa(unsigned long addr)25462546{25472547- int index = (addr / VMAP_BLOCK_SIZE) % num_possible_cpus();25472547+ int index = (addr / VMAP_BLOCK_SIZE) % nr_cpu_ids;25482548+25492549+ /*25502550+ * Please note, nr_cpu_ids points on a highest set25512551+ * possible bit, i.e. we never invoke cpumask_next()25522552+ * if an index points on it which is nr_cpu_ids - 1.25532553+ */25542554+ if (!cpu_possible(index))25552555+ index = cpumask_next(index, cpu_possible_mask);2548255625492557 return &per_cpu(vmap_block_queue, index).vmap_blocks;25502558}
+11-3
mm/workingset.c
···412412 * @file: whether the corresponding folio is from the file lru.413413 * @workingset: where the workingset value unpacked from shadow should414414 * be stored.415415+ * @flush: whether to flush cgroup rstat.415416 *416417 * Return: true if the shadow is for a recently evicted folio; false otherwise.417418 */418418-bool workingset_test_recent(void *shadow, bool file, bool *workingset)419419+bool workingset_test_recent(void *shadow, bool file, bool *workingset,420420+ bool flush)419421{420422 struct mem_cgroup *eviction_memcg;421423 struct lruvec *eviction_lruvec;···469467470468 /*471469 * Flush stats (and potentially sleep) outside the RCU read section.470470+ *471471+ * Note that workingset_test_recent() itself might be called in RCU read472472+ * section (for e.g, in cachestat) - these callers need to skip flushing473473+ * stats (via the flush argument).474474+ *472475 * XXX: With per-memcg flushing and thresholding, is ratelimiting473476 * still needed here?474477 */475475- mem_cgroup_flush_stats_ratelimited(eviction_memcg);478478+ if (flush)479479+ mem_cgroup_flush_stats_ratelimited(eviction_memcg);476480477481 eviction_lruvec = mem_cgroup_lruvec(eviction_memcg, pgdat);478482 refault = atomic_long_read(&eviction_lruvec->nonresident_age);···566558567559 mod_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + file, nr);568560569569- if (!workingset_test_recent(shadow, file, &workingset))561561+ if (!workingset_test_recent(shadow, file, &workingset, true))570562 return;571563572564 folio_set_active(folio);