include/linux/pagemap.h at v6.7 · tjh.dev/kernel

tjh.dev / kernel
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
kernel / include / linux / pagemap.h
at v6.7 1534 lines 48 kB view raw
   1/* SPDX-License-Identifier: GPL-2.0 */
   2#ifndef _LINUX_PAGEMAP_H
   3#define _LINUX_PAGEMAP_H
   4
   5/*
   6 * Copyright 1995 Linus Torvalds
   7 */
   8#include <linux/mm.h>
   9#include <linux/fs.h>
  10#include <linux/list.h>
  11#include <linux/highmem.h>
  12#include <linux/compiler.h>
  13#include <linux/uaccess.h>
  14#include <linux/gfp.h>
  15#include <linux/bitops.h>
  16#include <linux/hardirq.h> /* for in_interrupt() */
  17#include <linux/hugetlb_inline.h>
  18
  19struct folio_batch;
  20
  21unsigned long invalidate_mapping_pages(struct address_space *mapping,
  22					pgoff_t start, pgoff_t end);
  23
  24static inline void invalidate_remote_inode(struct inode *inode)
  25{
  26	if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
  27	    S_ISLNK(inode->i_mode))
  28		invalidate_mapping_pages(inode->i_mapping, 0, -1);
  29}
  30int invalidate_inode_pages2(struct address_space *mapping);
  31int invalidate_inode_pages2_range(struct address_space *mapping,
  32		pgoff_t start, pgoff_t end);
  33int kiocb_invalidate_pages(struct kiocb *iocb, size_t count);
  34void kiocb_invalidate_post_direct_write(struct kiocb *iocb, size_t count);
  35
  36int write_inode_now(struct inode *, int sync);
  37int filemap_fdatawrite(struct address_space *);
  38int filemap_flush(struct address_space *);
  39int filemap_fdatawait_keep_errors(struct address_space *mapping);
  40int filemap_fdatawait_range(struct address_space *, loff_t lstart, loff_t lend);
  41int filemap_fdatawait_range_keep_errors(struct address_space *mapping,
  42		loff_t start_byte, loff_t end_byte);
  43
  44static inline int filemap_fdatawait(struct address_space *mapping)
  45{
  46	return filemap_fdatawait_range(mapping, 0, LLONG_MAX);
  47}
  48
  49bool filemap_range_has_page(struct address_space *, loff_t lstart, loff_t lend);
  50int filemap_write_and_wait_range(struct address_space *mapping,
  51		loff_t lstart, loff_t lend);
  52int __filemap_fdatawrite_range(struct address_space *mapping,
  53		loff_t start, loff_t end, int sync_mode);
  54int filemap_fdatawrite_range(struct address_space *mapping,
  55		loff_t start, loff_t end);
  56int filemap_check_errors(struct address_space *mapping);
  57void __filemap_set_wb_err(struct address_space *mapping, int err);
  58int filemap_fdatawrite_wbc(struct address_space *mapping,
  59			   struct writeback_control *wbc);
  60int kiocb_write_and_wait(struct kiocb *iocb, size_t count);
  61
  62static inline int filemap_write_and_wait(struct address_space *mapping)
  63{
  64	return filemap_write_and_wait_range(mapping, 0, LLONG_MAX);
  65}
  66
  67/**
  68 * filemap_set_wb_err - set a writeback error on an address_space
  69 * @mapping: mapping in which to set writeback error
  70 * @err: error to be set in mapping
  71 *
  72 * When writeback fails in some way, we must record that error so that
  73 * userspace can be informed when fsync and the like are called.  We endeavor
  74 * to report errors on any file that was open at the time of the error.  Some
  75 * internal callers also need to know when writeback errors have occurred.
  76 *
  77 * When a writeback error occurs, most filesystems will want to call
  78 * filemap_set_wb_err to record the error in the mapping so that it will be
  79 * automatically reported whenever fsync is called on the file.
  80 */
  81static inline void filemap_set_wb_err(struct address_space *mapping, int err)
  82{
  83	/* Fastpath for common case of no error */
  84	if (unlikely(err))
  85		__filemap_set_wb_err(mapping, err);
  86}
  87
  88/**
  89 * filemap_check_wb_err - has an error occurred since the mark was sampled?
  90 * @mapping: mapping to check for writeback errors
  91 * @since: previously-sampled errseq_t
  92 *
  93 * Grab the errseq_t value from the mapping, and see if it has changed "since"
  94 * the given value was sampled.
  95 *
  96 * If it has then report the latest error set, otherwise return 0.
  97 */
  98static inline int filemap_check_wb_err(struct address_space *mapping,
  99					errseq_t since)
 100{
 101	return errseq_check(&mapping->wb_err, since);
 102}
 103
 104/**
 105 * filemap_sample_wb_err - sample the current errseq_t to test for later errors
 106 * @mapping: mapping to be sampled
 107 *
 108 * Writeback errors are always reported relative to a particular sample point
 109 * in the past. This function provides those sample points.
 110 */
 111static inline errseq_t filemap_sample_wb_err(struct address_space *mapping)
 112{
 113	return errseq_sample(&mapping->wb_err);
 114}
 115
 116/**
 117 * file_sample_sb_err - sample the current errseq_t to test for later errors
 118 * @file: file pointer to be sampled
 119 *
 120 * Grab the most current superblock-level errseq_t value for the given
 121 * struct file.
 122 */
 123static inline errseq_t file_sample_sb_err(struct file *file)
 124{
 125	return errseq_sample(&file->f_path.dentry->d_sb->s_wb_err);
 126}
 127
 128/*
 129 * Flush file data before changing attributes.  Caller must hold any locks
 130 * required to prevent further writes to this file until we're done setting
 131 * flags.
 132 */
 133static inline int inode_drain_writes(struct inode *inode)
 134{
 135	inode_dio_wait(inode);
 136	return filemap_write_and_wait(inode->i_mapping);
 137}
 138
 139static inline bool mapping_empty(struct address_space *mapping)
 140{
 141	return xa_empty(&mapping->i_pages);
 142}
 143
 144/*
 145 * mapping_shrinkable - test if page cache state allows inode reclaim
 146 * @mapping: the page cache mapping
 147 *
 148 * This checks the mapping's cache state for the pupose of inode
 149 * reclaim and LRU management.
 150 *
 151 * The caller is expected to hold the i_lock, but is not required to
 152 * hold the i_pages lock, which usually protects cache state. That's
 153 * because the i_lock and the list_lru lock that protect the inode and
 154 * its LRU state don't nest inside the irq-safe i_pages lock.
 155 *
 156 * Cache deletions are performed under the i_lock, which ensures that
 157 * when an inode goes empty, it will reliably get queued on the LRU.
 158 *
 159 * Cache additions do not acquire the i_lock and may race with this
 160 * check, in which case we'll report the inode as shrinkable when it
 161 * has cache pages. This is okay: the shrinker also checks the
 162 * refcount and the referenced bit, which will be elevated or set in
 163 * the process of adding new cache pages to an inode.
 164 */
 165static inline bool mapping_shrinkable(struct address_space *mapping)
 166{
 167	void *head;
 168
 169	/*
 170	 * On highmem systems, there could be lowmem pressure from the
 171	 * inodes before there is highmem pressure from the page
 172	 * cache. Make inodes shrinkable regardless of cache state.
 173	 */
 174	if (IS_ENABLED(CONFIG_HIGHMEM))
 175		return true;
 176
 177	/* Cache completely empty? Shrink away. */
 178	head = rcu_access_pointer(mapping->i_pages.xa_head);
 179	if (!head)
 180		return true;
 181
 182	/*
 183	 * The xarray stores single offset-0 entries directly in the
 184	 * head pointer, which allows non-resident page cache entries
 185	 * to escape the shadow shrinker's list of xarray nodes. The
 186	 * inode shrinker needs to pick them up under memory pressure.
 187	 */
 188	if (!xa_is_node(head) && xa_is_value(head))
 189		return true;
 190
 191	return false;
 192}
 193
 194/*
 195 * Bits in mapping->flags.
 196 */
 197enum mapping_flags {
 198	AS_EIO		= 0,	/* IO error on async write */
 199	AS_ENOSPC	= 1,	/* ENOSPC on async write */
 200	AS_MM_ALL_LOCKS	= 2,	/* under mm_take_all_locks() */
 201	AS_UNEVICTABLE	= 3,	/* e.g., ramdisk, SHM_LOCK */
 202	AS_EXITING	= 4, 	/* final truncate in progress */
 203	/* writeback related tags are not used */
 204	AS_NO_WRITEBACK_TAGS = 5,
 205	AS_LARGE_FOLIO_SUPPORT = 6,
 206	AS_RELEASE_ALWAYS,	/* Call ->release_folio(), even if no private data */
 207	AS_STABLE_WRITES,	/* must wait for writeback before modifying
 208				   folio contents */
 209};
 210
 211/**
 212 * mapping_set_error - record a writeback error in the address_space
 213 * @mapping: the mapping in which an error should be set
 214 * @error: the error to set in the mapping
 215 *
 216 * When writeback fails in some way, we must record that error so that
 217 * userspace can be informed when fsync and the like are called.  We endeavor
 218 * to report errors on any file that was open at the time of the error.  Some
 219 * internal callers also need to know when writeback errors have occurred.
 220 *
 221 * When a writeback error occurs, most filesystems will want to call
 222 * mapping_set_error to record the error in the mapping so that it can be
 223 * reported when the application calls fsync(2).
 224 */
 225static inline void mapping_set_error(struct address_space *mapping, int error)
 226{
 227	if (likely(!error))
 228		return;
 229
 230	/* Record in wb_err for checkers using errseq_t based tracking */
 231	__filemap_set_wb_err(mapping, error);
 232
 233	/* Record it in superblock */
 234	if (mapping->host)
 235		errseq_set(&mapping->host->i_sb->s_wb_err, error);
 236
 237	/* Record it in flags for now, for legacy callers */
 238	if (error == -ENOSPC)
 239		set_bit(AS_ENOSPC, &mapping->flags);
 240	else
 241		set_bit(AS_EIO, &mapping->flags);
 242}
 243
 244static inline void mapping_set_unevictable(struct address_space *mapping)
 245{
 246	set_bit(AS_UNEVICTABLE, &mapping->flags);
 247}
 248
 249static inline void mapping_clear_unevictable(struct address_space *mapping)
 250{
 251	clear_bit(AS_UNEVICTABLE, &mapping->flags);
 252}
 253
 254static inline bool mapping_unevictable(struct address_space *mapping)
 255{
 256	return mapping && test_bit(AS_UNEVICTABLE, &mapping->flags);
 257}
 258
 259static inline void mapping_set_exiting(struct address_space *mapping)
 260{
 261	set_bit(AS_EXITING, &mapping->flags);
 262}
 263
 264static inline int mapping_exiting(struct address_space *mapping)
 265{
 266	return test_bit(AS_EXITING, &mapping->flags);
 267}
 268
 269static inline void mapping_set_no_writeback_tags(struct address_space *mapping)
 270{
 271	set_bit(AS_NO_WRITEBACK_TAGS, &mapping->flags);
 272}
 273
 274static inline int mapping_use_writeback_tags(struct address_space *mapping)
 275{
 276	return !test_bit(AS_NO_WRITEBACK_TAGS, &mapping->flags);
 277}
 278
 279static inline bool mapping_release_always(const struct address_space *mapping)
 280{
 281	return test_bit(AS_RELEASE_ALWAYS, &mapping->flags);
 282}
 283
 284static inline void mapping_set_release_always(struct address_space *mapping)
 285{
 286	set_bit(AS_RELEASE_ALWAYS, &mapping->flags);
 287}
 288
 289static inline void mapping_clear_release_always(struct address_space *mapping)
 290{
 291	clear_bit(AS_RELEASE_ALWAYS, &mapping->flags);
 292}
 293
 294static inline bool mapping_stable_writes(const struct address_space *mapping)
 295{
 296	return test_bit(AS_STABLE_WRITES, &mapping->flags);
 297}
 298
 299static inline void mapping_set_stable_writes(struct address_space *mapping)
 300{
 301	set_bit(AS_STABLE_WRITES, &mapping->flags);
 302}
 303
 304static inline void mapping_clear_stable_writes(struct address_space *mapping)
 305{
 306	clear_bit(AS_STABLE_WRITES, &mapping->flags);
 307}
 308
 309static inline gfp_t mapping_gfp_mask(struct address_space * mapping)
 310{
 311	return mapping->gfp_mask;
 312}
 313
 314/* Restricts the given gfp_mask to what the mapping allows. */
 315static inline gfp_t mapping_gfp_constraint(struct address_space *mapping,
 316		gfp_t gfp_mask)
 317{
 318	return mapping_gfp_mask(mapping) & gfp_mask;
 319}
 320
 321/*
 322 * This is non-atomic.  Only to be used before the mapping is activated.
 323 * Probably needs a barrier...
 324 */
 325static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask)
 326{
 327	m->gfp_mask = mask;
 328}
 329
 330/**
 331 * mapping_set_large_folios() - Indicate the file supports large folios.
 332 * @mapping: The file.
 333 *
 334 * The filesystem should call this function in its inode constructor to
 335 * indicate that the VFS can use large folios to cache the contents of
 336 * the file.
 337 *
 338 * Context: This should not be called while the inode is active as it
 339 * is non-atomic.
 340 */
 341static inline void mapping_set_large_folios(struct address_space *mapping)
 342{
 343	__set_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags);
 344}
 345
 346/*
 347 * Large folio support currently depends on THP.  These dependencies are
 348 * being worked on but are not yet fixed.
 349 */
 350static inline bool mapping_large_folio_support(struct address_space *mapping)
 351{
 352	return IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
 353		test_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags);
 354}
 355
 356static inline int filemap_nr_thps(struct address_space *mapping)
 357{
 358#ifdef CONFIG_READ_ONLY_THP_FOR_FS
 359	return atomic_read(&mapping->nr_thps);
 360#else
 361	return 0;
 362#endif
 363}
 364
 365static inline void filemap_nr_thps_inc(struct address_space *mapping)
 366{
 367#ifdef CONFIG_READ_ONLY_THP_FOR_FS
 368	if (!mapping_large_folio_support(mapping))
 369		atomic_inc(&mapping->nr_thps);
 370#else
 371	WARN_ON_ONCE(mapping_large_folio_support(mapping) == 0);
 372#endif
 373}
 374
 375static inline void filemap_nr_thps_dec(struct address_space *mapping)
 376{
 377#ifdef CONFIG_READ_ONLY_THP_FOR_FS
 378	if (!mapping_large_folio_support(mapping))
 379		atomic_dec(&mapping->nr_thps);
 380#else
 381	WARN_ON_ONCE(mapping_large_folio_support(mapping) == 0);
 382#endif
 383}
 384
 385struct address_space *page_mapping(struct page *);
 386struct address_space *folio_mapping(struct folio *);
 387struct address_space *swapcache_mapping(struct folio *);
 388
 389/**
 390 * folio_file_mapping - Find the mapping this folio belongs to.
 391 * @folio: The folio.
 392 *
 393 * For folios which are in the page cache, return the mapping that this
 394 * page belongs to.  Folios in the swap cache return the mapping of the
 395 * swap file or swap device where the data is stored.  This is different
 396 * from the mapping returned by folio_mapping().  The only reason to
 397 * use it is if, like NFS, you return 0 from ->activate_swapfile.
 398 *
 399 * Do not call this for folios which aren't in the page cache or swap cache.
 400 */
 401static inline struct address_space *folio_file_mapping(struct folio *folio)
 402{
 403	if (unlikely(folio_test_swapcache(folio)))
 404		return swapcache_mapping(folio);
 405
 406	return folio->mapping;
 407}
 408
 409/**
 410 * folio_flush_mapping - Find the file mapping this folio belongs to.
 411 * @folio: The folio.
 412 *
 413 * For folios which are in the page cache, return the mapping that this
 414 * page belongs to.  Anonymous folios return NULL, even if they're in
 415 * the swap cache.  Other kinds of folio also return NULL.
 416 *
 417 * This is ONLY used by architecture cache flushing code.  If you aren't
 418 * writing cache flushing code, you want either folio_mapping() or
 419 * folio_file_mapping().
 420 */
 421static inline struct address_space *folio_flush_mapping(struct folio *folio)
 422{
 423	if (unlikely(folio_test_swapcache(folio)))
 424		return NULL;
 425
 426	return folio_mapping(folio);
 427}
 428
 429static inline struct address_space *page_file_mapping(struct page *page)
 430{
 431	return folio_file_mapping(page_folio(page));
 432}
 433
 434/**
 435 * folio_inode - Get the host inode for this folio.
 436 * @folio: The folio.
 437 *
 438 * For folios which are in the page cache, return the inode that this folio
 439 * belongs to.
 440 *
 441 * Do not call this for folios which aren't in the page cache.
 442 */
 443static inline struct inode *folio_inode(struct folio *folio)
 444{
 445	return folio->mapping->host;
 446}
 447
 448/**
 449 * folio_attach_private - Attach private data to a folio.
 450 * @folio: Folio to attach data to.
 451 * @data: Data to attach to folio.
 452 *
 453 * Attaching private data to a folio increments the page's reference count.
 454 * The data must be detached before the folio will be freed.
 455 */
 456static inline void folio_attach_private(struct folio *folio, void *data)
 457{
 458	folio_get(folio);
 459	folio->private = data;
 460	folio_set_private(folio);
 461}
 462
 463/**
 464 * folio_change_private - Change private data on a folio.
 465 * @folio: Folio to change the data on.
 466 * @data: Data to set on the folio.
 467 *
 468 * Change the private data attached to a folio and return the old
 469 * data.  The page must previously have had data attached and the data
 470 * must be detached before the folio will be freed.
 471 *
 472 * Return: Data that was previously attached to the folio.
 473 */
 474static inline void *folio_change_private(struct folio *folio, void *data)
 475{
 476	void *old = folio_get_private(folio);
 477
 478	folio->private = data;
 479	return old;
 480}
 481
 482/**
 483 * folio_detach_private - Detach private data from a folio.
 484 * @folio: Folio to detach data from.
 485 *
 486 * Removes the data that was previously attached to the folio and decrements
 487 * the refcount on the page.
 488 *
 489 * Return: Data that was attached to the folio.
 490 */
 491static inline void *folio_detach_private(struct folio *folio)
 492{
 493	void *data = folio_get_private(folio);
 494
 495	if (!folio_test_private(folio))
 496		return NULL;
 497	folio_clear_private(folio);
 498	folio->private = NULL;
 499	folio_put(folio);
 500
 501	return data;
 502}
 503
 504static inline void attach_page_private(struct page *page, void *data)
 505{
 506	folio_attach_private(page_folio(page), data);
 507}
 508
 509static inline void *detach_page_private(struct page *page)
 510{
 511	return folio_detach_private(page_folio(page));
 512}
 513
 514/*
 515 * There are some parts of the kernel which assume that PMD entries
 516 * are exactly HPAGE_PMD_ORDER.  Those should be fixed, but until then,
 517 * limit the maximum allocation order to PMD size.  I'm not aware of any
 518 * assumptions about maximum order if THP are disabled, but 8 seems like
 519 * a good order (that's 1MB if you're using 4kB pages)
 520 */
 521#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 522#define MAX_PAGECACHE_ORDER	HPAGE_PMD_ORDER
 523#else
 524#define MAX_PAGECACHE_ORDER	8
 525#endif
 526
 527#ifdef CONFIG_NUMA
 528struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order);
 529#else
 530static inline struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order)
 531{
 532	return folio_alloc(gfp, order);
 533}
 534#endif
 535
 536static inline struct page *__page_cache_alloc(gfp_t gfp)
 537{
 538	return &filemap_alloc_folio(gfp, 0)->page;
 539}
 540
 541static inline struct page *page_cache_alloc(struct address_space *x)
 542{
 543	return __page_cache_alloc(mapping_gfp_mask(x));
 544}
 545
 546static inline gfp_t readahead_gfp_mask(struct address_space *x)
 547{
 548	return mapping_gfp_mask(x) | __GFP_NORETRY | __GFP_NOWARN;
 549}
 550
 551typedef int filler_t(struct file *, struct folio *);
 552
 553pgoff_t page_cache_next_miss(struct address_space *mapping,
 554			     pgoff_t index, unsigned long max_scan);
 555pgoff_t page_cache_prev_miss(struct address_space *mapping,
 556			     pgoff_t index, unsigned long max_scan);
 557
 558/**
 559 * typedef fgf_t - Flags for getting folios from the page cache.
 560 *
 561 * Most users of the page cache will not need to use these flags;
 562 * there are convenience functions such as filemap_get_folio() and
 563 * filemap_lock_folio().  For users which need more control over exactly
 564 * what is done with the folios, these flags to __filemap_get_folio()
 565 * are available.
 566 *
 567 * * %FGP_ACCESSED - The folio will be marked accessed.
 568 * * %FGP_LOCK - The folio is returned locked.
 569 * * %FGP_CREAT - If no folio is present then a new folio is allocated,
 570 *   added to the page cache and the VM's LRU list.  The folio is
 571 *   returned locked.
 572 * * %FGP_FOR_MMAP - The caller wants to do its own locking dance if the
 573 *   folio is already in cache.  If the folio was allocated, unlock it
 574 *   before returning so the caller can do the same dance.
 575 * * %FGP_WRITE - The folio will be written to by the caller.
 576 * * %FGP_NOFS - __GFP_FS will get cleared in gfp.
 577 * * %FGP_NOWAIT - Don't block on the folio lock.
 578 * * %FGP_STABLE - Wait for the folio to be stable (finished writeback)
 579 * * %FGP_WRITEBEGIN - The flags to use in a filesystem write_begin()
 580 *   implementation.
 581 */
 582typedef unsigned int __bitwise fgf_t;
 583
 584#define FGP_ACCESSED		((__force fgf_t)0x00000001)
 585#define FGP_LOCK		((__force fgf_t)0x00000002)
 586#define FGP_CREAT		((__force fgf_t)0x00000004)
 587#define FGP_WRITE		((__force fgf_t)0x00000008)
 588#define FGP_NOFS		((__force fgf_t)0x00000010)
 589#define FGP_NOWAIT		((__force fgf_t)0x00000020)
 590#define FGP_FOR_MMAP		((__force fgf_t)0x00000040)
 591#define FGP_STABLE		((__force fgf_t)0x00000080)
 592#define FGF_GET_ORDER(fgf)	(((__force unsigned)fgf) >> 26)	/* top 6 bits */
 593
 594#define FGP_WRITEBEGIN		(FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE)
 595
 596/**
 597 * fgf_set_order - Encode a length in the fgf_t flags.
 598 * @size: The suggested size of the folio to create.
 599 *
 600 * The caller of __filemap_get_folio() can use this to suggest a preferred
 601 * size for the folio that is created.  If there is already a folio at
 602 * the index, it will be returned, no matter what its size.  If a folio
 603 * is freshly created, it may be of a different size than requested
 604 * due to alignment constraints, memory pressure, or the presence of
 605 * other folios at nearby indices.
 606 */
 607static inline fgf_t fgf_set_order(size_t size)
 608{
 609	unsigned int shift = ilog2(size);
 610
 611	if (shift <= PAGE_SHIFT)
 612		return 0;
 613	return (__force fgf_t)((shift - PAGE_SHIFT) << 26);
 614}
 615
 616void *filemap_get_entry(struct address_space *mapping, pgoff_t index);
 617struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
 618		fgf_t fgp_flags, gfp_t gfp);
 619struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index,
 620		fgf_t fgp_flags, gfp_t gfp);
 621
 622/**
 623 * filemap_get_folio - Find and get a folio.
 624 * @mapping: The address_space to search.
 625 * @index: The page index.
 626 *
 627 * Looks up the page cache entry at @mapping & @index.  If a folio is
 628 * present, it is returned with an increased refcount.
 629 *
 630 * Return: A folio or ERR_PTR(-ENOENT) if there is no folio in the cache for
 631 * this index.  Will not return a shadow, swap or DAX entry.
 632 */
 633static inline struct folio *filemap_get_folio(struct address_space *mapping,
 634					pgoff_t index)
 635{
 636	return __filemap_get_folio(mapping, index, 0, 0);
 637}
 638
 639/**
 640 * filemap_lock_folio - Find and lock a folio.
 641 * @mapping: The address_space to search.
 642 * @index: The page index.
 643 *
 644 * Looks up the page cache entry at @mapping & @index.  If a folio is
 645 * present, it is returned locked with an increased refcount.
 646 *
 647 * Context: May sleep.
 648 * Return: A folio or ERR_PTR(-ENOENT) if there is no folio in the cache for
 649 * this index.  Will not return a shadow, swap or DAX entry.
 650 */
 651static inline struct folio *filemap_lock_folio(struct address_space *mapping,
 652					pgoff_t index)
 653{
 654	return __filemap_get_folio(mapping, index, FGP_LOCK, 0);
 655}
 656
 657/**
 658 * filemap_grab_folio - grab a folio from the page cache
 659 * @mapping: The address space to search
 660 * @index: The page index
 661 *
 662 * Looks up the page cache entry at @mapping & @index. If no folio is found,
 663 * a new folio is created. The folio is locked, marked as accessed, and
 664 * returned.
 665 *
 666 * Return: A found or created folio. ERR_PTR(-ENOMEM) if no folio is found
 667 * and failed to create a folio.
 668 */
 669static inline struct folio *filemap_grab_folio(struct address_space *mapping,
 670					pgoff_t index)
 671{
 672	return __filemap_get_folio(mapping, index,
 673			FGP_LOCK | FGP_ACCESSED | FGP_CREAT,
 674			mapping_gfp_mask(mapping));
 675}
 676
 677/**
 678 * find_get_page - find and get a page reference
 679 * @mapping: the address_space to search
 680 * @offset: the page index
 681 *
 682 * Looks up the page cache slot at @mapping & @offset.  If there is a
 683 * page cache page, it is returned with an increased refcount.
 684 *
 685 * Otherwise, %NULL is returned.
 686 */
 687static inline struct page *find_get_page(struct address_space *mapping,
 688					pgoff_t offset)
 689{
 690	return pagecache_get_page(mapping, offset, 0, 0);
 691}
 692
 693static inline struct page *find_get_page_flags(struct address_space *mapping,
 694					pgoff_t offset, fgf_t fgp_flags)
 695{
 696	return pagecache_get_page(mapping, offset, fgp_flags, 0);
 697}
 698
 699/**
 700 * find_lock_page - locate, pin and lock a pagecache page
 701 * @mapping: the address_space to search
 702 * @index: the page index
 703 *
 704 * Looks up the page cache entry at @mapping & @index.  If there is a
 705 * page cache page, it is returned locked and with an increased
 706 * refcount.
 707 *
 708 * Context: May sleep.
 709 * Return: A struct page or %NULL if there is no page in the cache for this
 710 * index.
 711 */
 712static inline struct page *find_lock_page(struct address_space *mapping,
 713					pgoff_t index)
 714{
 715	return pagecache_get_page(mapping, index, FGP_LOCK, 0);
 716}
 717
 718/**
 719 * find_or_create_page - locate or add a pagecache page
 720 * @mapping: the page's address_space
 721 * @index: the page's index into the mapping
 722 * @gfp_mask: page allocation mode
 723 *
 724 * Looks up the page cache slot at @mapping & @offset.  If there is a
 725 * page cache page, it is returned locked and with an increased
 726 * refcount.
 727 *
 728 * If the page is not present, a new page is allocated using @gfp_mask
 729 * and added to the page cache and the VM's LRU list.  The page is
 730 * returned locked and with an increased refcount.
 731 *
 732 * On memory exhaustion, %NULL is returned.
 733 *
 734 * find_or_create_page() may sleep, even if @gfp_flags specifies an
 735 * atomic allocation!
 736 */
 737static inline struct page *find_or_create_page(struct address_space *mapping,
 738					pgoff_t index, gfp_t gfp_mask)
 739{
 740	return pagecache_get_page(mapping, index,
 741					FGP_LOCK|FGP_ACCESSED|FGP_CREAT,
 742					gfp_mask);
 743}
 744
 745/**
 746 * grab_cache_page_nowait - returns locked page at given index in given cache
 747 * @mapping: target address_space
 748 * @index: the page index
 749 *
 750 * Same as grab_cache_page(), but do not wait if the page is unavailable.
 751 * This is intended for speculative data generators, where the data can
 752 * be regenerated if the page couldn't be grabbed.  This routine should
 753 * be safe to call while holding the lock for another page.
 754 *
 755 * Clear __GFP_FS when allocating the page to avoid recursion into the fs
 756 * and deadlock against the caller's locked page.
 757 */
 758static inline struct page *grab_cache_page_nowait(struct address_space *mapping,
 759				pgoff_t index)
 760{
 761	return pagecache_get_page(mapping, index,
 762			FGP_LOCK|FGP_CREAT|FGP_NOFS|FGP_NOWAIT,
 763			mapping_gfp_mask(mapping));
 764}
 765
 766#define swapcache_index(folio)	__page_file_index(&(folio)->page)
 767
 768/**
 769 * folio_index - File index of a folio.
 770 * @folio: The folio.
 771 *
 772 * For a folio which is either in the page cache or the swap cache,
 773 * return its index within the address_space it belongs to.  If you know
 774 * the page is definitely in the page cache, you can look at the folio's
 775 * index directly.
 776 *
 777 * Return: The index (offset in units of pages) of a folio in its file.
 778 */
 779static inline pgoff_t folio_index(struct folio *folio)
 780{
 781        if (unlikely(folio_test_swapcache(folio)))
 782                return swapcache_index(folio);
 783        return folio->index;
 784}
 785
 786/**
 787 * folio_next_index - Get the index of the next folio.
 788 * @folio: The current folio.
 789 *
 790 * Return: The index of the folio which follows this folio in the file.
 791 */
 792static inline pgoff_t folio_next_index(struct folio *folio)
 793{
 794	return folio->index + folio_nr_pages(folio);
 795}
 796
 797/**
 798 * folio_file_page - The page for a particular index.
 799 * @folio: The folio which contains this index.
 800 * @index: The index we want to look up.
 801 *
 802 * Sometimes after looking up a folio in the page cache, we need to
 803 * obtain the specific page for an index (eg a page fault).
 804 *
 805 * Return: The page containing the file data for this index.
 806 */
 807static inline struct page *folio_file_page(struct folio *folio, pgoff_t index)
 808{
 809	return folio_page(folio, index & (folio_nr_pages(folio) - 1));
 810}
 811
 812/**
 813 * folio_contains - Does this folio contain this index?
 814 * @folio: The folio.
 815 * @index: The page index within the file.
 816 *
 817 * Context: The caller should have the page locked in order to prevent
 818 * (eg) shmem from moving the page between the page cache and swap cache
 819 * and changing its index in the middle of the operation.
 820 * Return: true or false.
 821 */
 822static inline bool folio_contains(struct folio *folio, pgoff_t index)
 823{
 824	return index - folio_index(folio) < folio_nr_pages(folio);
 825}
 826
 827/*
 828 * Given the page we found in the page cache, return the page corresponding
 829 * to this index in the file
 830 */
 831static inline struct page *find_subpage(struct page *head, pgoff_t index)
 832{
 833	/* HugeTLBfs wants the head page regardless */
 834	if (PageHuge(head))
 835		return head;
 836
 837	return head + (index & (thp_nr_pages(head) - 1));
 838}
 839
 840unsigned filemap_get_folios(struct address_space *mapping, pgoff_t *start,
 841		pgoff_t end, struct folio_batch *fbatch);
 842unsigned filemap_get_folios_contig(struct address_space *mapping,
 843		pgoff_t *start, pgoff_t end, struct folio_batch *fbatch);
 844unsigned filemap_get_folios_tag(struct address_space *mapping, pgoff_t *start,
 845		pgoff_t end, xa_mark_t tag, struct folio_batch *fbatch);
 846
 847struct page *grab_cache_page_write_begin(struct address_space *mapping,
 848			pgoff_t index);
 849
 850/*
 851 * Returns locked page at given index in given cache, creating it if needed.
 852 */
 853static inline struct page *grab_cache_page(struct address_space *mapping,
 854								pgoff_t index)
 855{
 856	return find_or_create_page(mapping, index, mapping_gfp_mask(mapping));
 857}
 858
 859struct folio *read_cache_folio(struct address_space *, pgoff_t index,
 860		filler_t *filler, struct file *file);
 861struct folio *mapping_read_folio_gfp(struct address_space *, pgoff_t index,
 862		gfp_t flags);
 863struct page *read_cache_page(struct address_space *, pgoff_t index,
 864		filler_t *filler, struct file *file);
 865extern struct page * read_cache_page_gfp(struct address_space *mapping,
 866				pgoff_t index, gfp_t gfp_mask);
 867
 868static inline struct page *read_mapping_page(struct address_space *mapping,
 869				pgoff_t index, struct file *file)
 870{
 871	return read_cache_page(mapping, index, NULL, file);
 872}
 873
 874static inline struct folio *read_mapping_folio(struct address_space *mapping,
 875				pgoff_t index, struct file *file)
 876{
 877	return read_cache_folio(mapping, index, NULL, file);
 878}
 879
 880/*
 881 * Get the offset in PAGE_SIZE (even for hugetlb pages).
 882 */
 883static inline pgoff_t page_to_pgoff(struct page *page)
 884{
 885	struct page *head;
 886
 887	if (likely(!PageTransTail(page)))
 888		return page->index;
 889
 890	head = compound_head(page);
 891	/*
 892	 *  We don't initialize ->index for tail pages: calculate based on
 893	 *  head page
 894	 */
 895	return head->index + page - head;
 896}
 897
 898/*
 899 * Return byte-offset into filesystem object for page.
 900 */
 901static inline loff_t page_offset(struct page *page)
 902{
 903	return ((loff_t)page->index) << PAGE_SHIFT;
 904}
 905
 906static inline loff_t page_file_offset(struct page *page)
 907{
 908	return ((loff_t)page_index(page)) << PAGE_SHIFT;
 909}
 910
 911/**
 912 * folio_pos - Returns the byte position of this folio in its file.
 913 * @folio: The folio.
 914 */
 915static inline loff_t folio_pos(struct folio *folio)
 916{
 917	return page_offset(&folio->page);
 918}
 919
 920/**
 921 * folio_file_pos - Returns the byte position of this folio in its file.
 922 * @folio: The folio.
 923 *
 924 * This differs from folio_pos() for folios which belong to a swap file.
 925 * NFS is the only filesystem today which needs to use folio_file_pos().
 926 */
 927static inline loff_t folio_file_pos(struct folio *folio)
 928{
 929	return page_file_offset(&folio->page);
 930}
 931
 932/*
 933 * Get the offset in PAGE_SIZE (even for hugetlb folios).
 934 */
 935static inline pgoff_t folio_pgoff(struct folio *folio)
 936{
 937	return folio->index;
 938}
 939
 940static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
 941					unsigned long address)
 942{
 943	pgoff_t pgoff;
 944	pgoff = (address - vma->vm_start) >> PAGE_SHIFT;
 945	pgoff += vma->vm_pgoff;
 946	return pgoff;
 947}
 948
 949struct wait_page_key {
 950	struct folio *folio;
 951	int bit_nr;
 952	int page_match;
 953};
 954
 955struct wait_page_queue {
 956	struct folio *folio;
 957	int bit_nr;
 958	wait_queue_entry_t wait;
 959};
 960
 961static inline bool wake_page_match(struct wait_page_queue *wait_page,
 962				  struct wait_page_key *key)
 963{
 964	if (wait_page->folio != key->folio)
 965	       return false;
 966	key->page_match = 1;
 967
 968	if (wait_page->bit_nr != key->bit_nr)
 969		return false;
 970
 971	return true;
 972}
 973
 974void __folio_lock(struct folio *folio);
 975int __folio_lock_killable(struct folio *folio);
 976vm_fault_t __folio_lock_or_retry(struct folio *folio, struct vm_fault *vmf);
 977void unlock_page(struct page *page);
 978void folio_unlock(struct folio *folio);
 979
 980/**
 981 * folio_trylock() - Attempt to lock a folio.
 982 * @folio: The folio to attempt to lock.
 983 *
 984 * Sometimes it is undesirable to wait for a folio to be unlocked (eg
 985 * when the locks are being taken in the wrong order, or if making
 986 * progress through a batch of folios is more important than processing
 987 * them in order).  Usually folio_lock() is the correct function to call.
 988 *
 989 * Context: Any context.
 990 * Return: Whether the lock was successfully acquired.
 991 */
 992static inline bool folio_trylock(struct folio *folio)
 993{
 994	return likely(!test_and_set_bit_lock(PG_locked, folio_flags(folio, 0)));
 995}
 996
 997/*
 998 * Return true if the page was successfully locked
 999 */
1000static inline int trylock_page(struct page *page)
1001{
1002	return folio_trylock(page_folio(page));
1003}
1004
1005/**
1006 * folio_lock() - Lock this folio.
1007 * @folio: The folio to lock.
1008 *
1009 * The folio lock protects against many things, probably more than it
1010 * should.  It is primarily held while a folio is being brought uptodate,
1011 * either from its backing file or from swap.  It is also held while a
1012 * folio is being truncated from its address_space, so holding the lock
1013 * is sufficient to keep folio->mapping stable.
1014 *
1015 * The folio lock is also held while write() is modifying the page to
1016 * provide POSIX atomicity guarantees (as long as the write does not
1017 * cross a page boundary).  Other modifications to the data in the folio
1018 * do not hold the folio lock and can race with writes, eg DMA and stores
1019 * to mapped pages.
1020 *
1021 * Context: May sleep.  If you need to acquire the locks of two or
1022 * more folios, they must be in order of ascending index, if they are
1023 * in the same address_space.  If they are in different address_spaces,
1024 * acquire the lock of the folio which belongs to the address_space which
1025 * has the lowest address in memory first.
1026 */
1027static inline void folio_lock(struct folio *folio)
1028{
1029	might_sleep();
1030	if (!folio_trylock(folio))
1031		__folio_lock(folio);
1032}
1033
1034/**
1035 * lock_page() - Lock the folio containing this page.
1036 * @page: The page to lock.
1037 *
1038 * See folio_lock() for a description of what the lock protects.
1039 * This is a legacy function and new code should probably use folio_lock()
1040 * instead.
1041 *
1042 * Context: May sleep.  Pages in the same folio share a lock, so do not
1043 * attempt to lock two pages which share a folio.
1044 */
1045static inline void lock_page(struct page *page)
1046{
1047	struct folio *folio;
1048	might_sleep();
1049
1050	folio = page_folio(page);
1051	if (!folio_trylock(folio))
1052		__folio_lock(folio);
1053}
1054
1055/**
1056 * folio_lock_killable() - Lock this folio, interruptible by a fatal signal.
1057 * @folio: The folio to lock.
1058 *
1059 * Attempts to lock the folio, like folio_lock(), except that the sleep
1060 * to acquire the lock is interruptible by a fatal signal.
1061 *
1062 * Context: May sleep; see folio_lock().
1063 * Return: 0 if the lock was acquired; -EINTR if a fatal signal was received.
1064 */
1065static inline int folio_lock_killable(struct folio *folio)
1066{
1067	might_sleep();
1068	if (!folio_trylock(folio))
1069		return __folio_lock_killable(folio);
1070	return 0;
1071}
1072
1073/*
1074 * folio_lock_or_retry - Lock the folio, unless this would block and the
1075 * caller indicated that it can handle a retry.
1076 *
1077 * Return value and mmap_lock implications depend on flags; see
1078 * __folio_lock_or_retry().
1079 */
1080static inline vm_fault_t folio_lock_or_retry(struct folio *folio,
1081					     struct vm_fault *vmf)
1082{
1083	might_sleep();
1084	if (!folio_trylock(folio))
1085		return __folio_lock_or_retry(folio, vmf);
1086	return 0;
1087}
1088
1089/*
1090 * This is exported only for folio_wait_locked/folio_wait_writeback, etc.,
1091 * and should not be used directly.
1092 */
1093void folio_wait_bit(struct folio *folio, int bit_nr);
1094int folio_wait_bit_killable(struct folio *folio, int bit_nr);
1095
1096/* 
1097 * Wait for a folio to be unlocked.
1098 *
1099 * This must be called with the caller "holding" the folio,
1100 * ie with increased folio reference count so that the folio won't
1101 * go away during the wait.
1102 */
1103static inline void folio_wait_locked(struct folio *folio)
1104{
1105	if (folio_test_locked(folio))
1106		folio_wait_bit(folio, PG_locked);
1107}
1108
1109static inline int folio_wait_locked_killable(struct folio *folio)
1110{
1111	if (!folio_test_locked(folio))
1112		return 0;
1113	return folio_wait_bit_killable(folio, PG_locked);
1114}
1115
1116static inline void wait_on_page_locked(struct page *page)
1117{
1118	folio_wait_locked(page_folio(page));
1119}
1120
1121void folio_end_read(struct folio *folio, bool success);
1122void wait_on_page_writeback(struct page *page);
1123void folio_wait_writeback(struct folio *folio);
1124int folio_wait_writeback_killable(struct folio *folio);
1125void end_page_writeback(struct page *page);
1126void folio_end_writeback(struct folio *folio);
1127void wait_for_stable_page(struct page *page);
1128void folio_wait_stable(struct folio *folio);
1129void __folio_mark_dirty(struct folio *folio, struct address_space *, int warn);
1130static inline void __set_page_dirty(struct page *page,
1131		struct address_space *mapping, int warn)
1132{
1133	__folio_mark_dirty(page_folio(page), mapping, warn);
1134}
1135void folio_account_cleaned(struct folio *folio, struct bdi_writeback *wb);
1136void __folio_cancel_dirty(struct folio *folio);
1137static inline void folio_cancel_dirty(struct folio *folio)
1138{
1139	/* Avoid atomic ops, locking, etc. when not actually needed. */
1140	if (folio_test_dirty(folio))
1141		__folio_cancel_dirty(folio);
1142}
1143bool folio_clear_dirty_for_io(struct folio *folio);
1144bool clear_page_dirty_for_io(struct page *page);
1145void folio_invalidate(struct folio *folio, size_t offset, size_t length);
1146int __set_page_dirty_nobuffers(struct page *page);
1147bool noop_dirty_folio(struct address_space *mapping, struct folio *folio);
1148
1149#ifdef CONFIG_MIGRATION
1150int filemap_migrate_folio(struct address_space *mapping, struct folio *dst,
1151		struct folio *src, enum migrate_mode mode);
1152#else
1153#define filemap_migrate_folio NULL
1154#endif
1155void folio_end_private_2(struct folio *folio);
1156void folio_wait_private_2(struct folio *folio);
1157int folio_wait_private_2_killable(struct folio *folio);
1158
1159/*
1160 * Add an arbitrary waiter to a page's wait queue
1161 */
1162void folio_add_wait_queue(struct folio *folio, wait_queue_entry_t *waiter);
1163
1164/*
1165 * Fault in userspace address range.
1166 */
1167size_t fault_in_writeable(char __user *uaddr, size_t size);
1168size_t fault_in_subpage_writeable(char __user *uaddr, size_t size);
1169size_t fault_in_safe_writeable(const char __user *uaddr, size_t size);
1170size_t fault_in_readable(const char __user *uaddr, size_t size);
1171
1172int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
1173		pgoff_t index, gfp_t gfp);
1174int filemap_add_folio(struct address_space *mapping, struct folio *folio,
1175		pgoff_t index, gfp_t gfp);
1176void filemap_remove_folio(struct folio *folio);
1177void __filemap_remove_folio(struct folio *folio, void *shadow);
1178void replace_page_cache_folio(struct folio *old, struct folio *new);
1179void delete_from_page_cache_batch(struct address_space *mapping,
1180				  struct folio_batch *fbatch);
1181bool filemap_release_folio(struct folio *folio, gfp_t gfp);
1182loff_t mapping_seek_hole_data(struct address_space *, loff_t start, loff_t end,
1183		int whence);
1184
1185/* Must be non-static for BPF error injection */
1186int __filemap_add_folio(struct address_space *mapping, struct folio *folio,
1187		pgoff_t index, gfp_t gfp, void **shadowp);
1188
1189bool filemap_range_has_writeback(struct address_space *mapping,
1190				 loff_t start_byte, loff_t end_byte);
1191
1192/**
1193 * filemap_range_needs_writeback - check if range potentially needs writeback
1194 * @mapping:           address space within which to check
1195 * @start_byte:        offset in bytes where the range starts
1196 * @end_byte:          offset in bytes where the range ends (inclusive)
1197 *
1198 * Find at least one page in the range supplied, usually used to check if
1199 * direct writing in this range will trigger a writeback. Used by O_DIRECT
1200 * read/write with IOCB_NOWAIT, to see if the caller needs to do
1201 * filemap_write_and_wait_range() before proceeding.
1202 *
1203 * Return: %true if the caller should do filemap_write_and_wait_range() before
1204 * doing O_DIRECT to a page in this range, %false otherwise.
1205 */
1206static inline bool filemap_range_needs_writeback(struct address_space *mapping,
1207						 loff_t start_byte,
1208						 loff_t end_byte)
1209{
1210	if (!mapping->nrpages)
1211		return false;
1212	if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) &&
1213	    !mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK))
1214		return false;
1215	return filemap_range_has_writeback(mapping, start_byte, end_byte);
1216}
1217
1218/**
1219 * struct readahead_control - Describes a readahead request.
1220 *
1221 * A readahead request is for consecutive pages.  Filesystems which
1222 * implement the ->readahead method should call readahead_page() or
1223 * readahead_page_batch() in a loop and attempt to start I/O against
1224 * each page in the request.
1225 *
1226 * Most of the fields in this struct are private and should be accessed
1227 * by the functions below.
1228 *
1229 * @file: The file, used primarily by network filesystems for authentication.
1230 *	  May be NULL if invoked internally by the filesystem.
1231 * @mapping: Readahead this filesystem object.
1232 * @ra: File readahead state.  May be NULL.
1233 */
1234struct readahead_control {
1235	struct file *file;
1236	struct address_space *mapping;
1237	struct file_ra_state *ra;
1238/* private: use the readahead_* accessors instead */
1239	pgoff_t _index;
1240	unsigned int _nr_pages;
1241	unsigned int _batch_count;
1242	bool _workingset;
1243	unsigned long _pflags;
1244};
1245
1246#define DEFINE_READAHEAD(ractl, f, r, m, i)				\
1247	struct readahead_control ractl = {				\
1248		.file = f,						\
1249		.mapping = m,						\
1250		.ra = r,						\
1251		._index = i,						\
1252	}
1253
1254#define VM_READAHEAD_PAGES	(SZ_128K / PAGE_SIZE)
1255
1256void page_cache_ra_unbounded(struct readahead_control *,
1257		unsigned long nr_to_read, unsigned long lookahead_count);
1258void page_cache_sync_ra(struct readahead_control *, unsigned long req_count);
1259void page_cache_async_ra(struct readahead_control *, struct folio *,
1260		unsigned long req_count);
1261void readahead_expand(struct readahead_control *ractl,
1262		      loff_t new_start, size_t new_len);
1263
1264/**
1265 * page_cache_sync_readahead - generic file readahead
1266 * @mapping: address_space which holds the pagecache and I/O vectors
1267 * @ra: file_ra_state which holds the readahead state
1268 * @file: Used by the filesystem for authentication.
1269 * @index: Index of first page to be read.
1270 * @req_count: Total number of pages being read by the caller.
1271 *
1272 * page_cache_sync_readahead() should be called when a cache miss happened:
1273 * it will submit the read.  The readahead logic may decide to piggyback more
1274 * pages onto the read request if access patterns suggest it will improve
1275 * performance.
1276 */
1277static inline
1278void page_cache_sync_readahead(struct address_space *mapping,
1279		struct file_ra_state *ra, struct file *file, pgoff_t index,
1280		unsigned long req_count)
1281{
1282	DEFINE_READAHEAD(ractl, file, ra, mapping, index);
1283	page_cache_sync_ra(&ractl, req_count);
1284}
1285
1286/**
1287 * page_cache_async_readahead - file readahead for marked pages
1288 * @mapping: address_space which holds the pagecache and I/O vectors
1289 * @ra: file_ra_state which holds the readahead state
1290 * @file: Used by the filesystem for authentication.
1291 * @folio: The folio at @index which triggered the readahead call.
1292 * @index: Index of first page to be read.
1293 * @req_count: Total number of pages being read by the caller.
1294 *
1295 * page_cache_async_readahead() should be called when a page is used which
1296 * is marked as PageReadahead; this is a marker to suggest that the application
1297 * has used up enough of the readahead window that we should start pulling in
1298 * more pages.
1299 */
1300static inline
1301void page_cache_async_readahead(struct address_space *mapping,
1302		struct file_ra_state *ra, struct file *file,
1303		struct folio *folio, pgoff_t index, unsigned long req_count)
1304{
1305	DEFINE_READAHEAD(ractl, file, ra, mapping, index);
1306	page_cache_async_ra(&ractl, folio, req_count);
1307}
1308
1309static inline struct folio *__readahead_folio(struct readahead_control *ractl)
1310{
1311	struct folio *folio;
1312
1313	BUG_ON(ractl->_batch_count > ractl->_nr_pages);
1314	ractl->_nr_pages -= ractl->_batch_count;
1315	ractl->_index += ractl->_batch_count;
1316
1317	if (!ractl->_nr_pages) {
1318		ractl->_batch_count = 0;
1319		return NULL;
1320	}
1321
1322	folio = xa_load(&ractl->mapping->i_pages, ractl->_index);
1323	VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
1324	ractl->_batch_count = folio_nr_pages(folio);
1325
1326	return folio;
1327}
1328
1329/**
1330 * readahead_page - Get the next page to read.
1331 * @ractl: The current readahead request.
1332 *
1333 * Context: The page is locked and has an elevated refcount.  The caller
1334 * should decreases the refcount once the page has been submitted for I/O
1335 * and unlock the page once all I/O to that page has completed.
1336 * Return: A pointer to the next page, or %NULL if we are done.
1337 */
1338static inline struct page *readahead_page(struct readahead_control *ractl)
1339{
1340	struct folio *folio = __readahead_folio(ractl);
1341
1342	return &folio->page;
1343}
1344
1345/**
1346 * readahead_folio - Get the next folio to read.
1347 * @ractl: The current readahead request.
1348 *
1349 * Context: The folio is locked.  The caller should unlock the folio once
1350 * all I/O to that folio has completed.
1351 * Return: A pointer to the next folio, or %NULL if we are done.
1352 */
1353static inline struct folio *readahead_folio(struct readahead_control *ractl)
1354{
1355	struct folio *folio = __readahead_folio(ractl);
1356
1357	if (folio)
1358		folio_put(folio);
1359	return folio;
1360}
1361
1362static inline unsigned int __readahead_batch(struct readahead_control *rac,
1363		struct page **array, unsigned int array_sz)
1364{
1365	unsigned int i = 0;
1366	XA_STATE(xas, &rac->mapping->i_pages, 0);
1367	struct page *page;
1368
1369	BUG_ON(rac->_batch_count > rac->_nr_pages);
1370	rac->_nr_pages -= rac->_batch_count;
1371	rac->_index += rac->_batch_count;
1372	rac->_batch_count = 0;
1373
1374	xas_set(&xas, rac->_index);
1375	rcu_read_lock();
1376	xas_for_each(&xas, page, rac->_index + rac->_nr_pages - 1) {
1377		if (xas_retry(&xas, page))
1378			continue;
1379		VM_BUG_ON_PAGE(!PageLocked(page), page);
1380		VM_BUG_ON_PAGE(PageTail(page), page);
1381		array[i++] = page;
1382		rac->_batch_count += thp_nr_pages(page);
1383		if (i == array_sz)
1384			break;
1385	}
1386	rcu_read_unlock();
1387
1388	return i;
1389}
1390
1391/**
1392 * readahead_page_batch - Get a batch of pages to read.
1393 * @rac: The current readahead request.
1394 * @array: An array of pointers to struct page.
1395 *
1396 * Context: The pages are locked and have an elevated refcount.  The caller
1397 * should decreases the refcount once the page has been submitted for I/O
1398 * and unlock the page once all I/O to that page has completed.
1399 * Return: The number of pages placed in the array.  0 indicates the request
1400 * is complete.
1401 */
1402#define readahead_page_batch(rac, array)				\
1403	__readahead_batch(rac, array, ARRAY_SIZE(array))
1404
1405/**
1406 * readahead_pos - The byte offset into the file of this readahead request.
1407 * @rac: The readahead request.
1408 */
1409static inline loff_t readahead_pos(struct readahead_control *rac)
1410{
1411	return (loff_t)rac->_index * PAGE_SIZE;
1412}
1413
1414/**
1415 * readahead_length - The number of bytes in this readahead request.
1416 * @rac: The readahead request.
1417 */
1418static inline size_t readahead_length(struct readahead_control *rac)
1419{
1420	return rac->_nr_pages * PAGE_SIZE;
1421}
1422
1423/**
1424 * readahead_index - The index of the first page in this readahead request.
1425 * @rac: The readahead request.
1426 */
1427static inline pgoff_t readahead_index(struct readahead_control *rac)
1428{
1429	return rac->_index;
1430}
1431
1432/**
1433 * readahead_count - The number of pages in this readahead request.
1434 * @rac: The readahead request.
1435 */
1436static inline unsigned int readahead_count(struct readahead_control *rac)
1437{
1438	return rac->_nr_pages;
1439}
1440
1441/**
1442 * readahead_batch_length - The number of bytes in the current batch.
1443 * @rac: The readahead request.
1444 */
1445static inline size_t readahead_batch_length(struct readahead_control *rac)
1446{
1447	return rac->_batch_count * PAGE_SIZE;
1448}
1449
1450static inline unsigned long dir_pages(struct inode *inode)
1451{
1452	return (unsigned long)(inode->i_size + PAGE_SIZE - 1) >>
1453			       PAGE_SHIFT;
1454}
1455
1456/**
1457 * folio_mkwrite_check_truncate - check if folio was truncated
1458 * @folio: the folio to check
1459 * @inode: the inode to check the folio against
1460 *
1461 * Return: the number of bytes in the folio up to EOF,
1462 * or -EFAULT if the folio was truncated.
1463 */
1464static inline ssize_t folio_mkwrite_check_truncate(struct folio *folio,
1465					      struct inode *inode)
1466{
1467	loff_t size = i_size_read(inode);
1468	pgoff_t index = size >> PAGE_SHIFT;
1469	size_t offset = offset_in_folio(folio, size);
1470
1471	if (!folio->mapping)
1472		return -EFAULT;
1473
1474	/* folio is wholly inside EOF */
1475	if (folio_next_index(folio) - 1 < index)
1476		return folio_size(folio);
1477	/* folio is wholly past EOF */
1478	if (folio->index > index || !offset)
1479		return -EFAULT;
1480	/* folio is partially inside EOF */
1481	return offset;
1482}
1483
1484/**
1485 * page_mkwrite_check_truncate - check if page was truncated
1486 * @page: the page to check
1487 * @inode: the inode to check the page against
1488 *
1489 * Returns the number of bytes in the page up to EOF,
1490 * or -EFAULT if the page was truncated.
1491 */
1492static inline int page_mkwrite_check_truncate(struct page *page,
1493					      struct inode *inode)
1494{
1495	loff_t size = i_size_read(inode);
1496	pgoff_t index = size >> PAGE_SHIFT;
1497	int offset = offset_in_page(size);
1498
1499	if (page->mapping != inode->i_mapping)
1500		return -EFAULT;
1501
1502	/* page is wholly inside EOF */
1503	if (page->index < index)
1504		return PAGE_SIZE;
1505	/* page is wholly past EOF */
1506	if (page->index > index || !offset)
1507		return -EFAULT;
1508	/* page is partially inside EOF */
1509	return offset;
1510}
1511
1512/**
1513 * i_blocks_per_folio - How many blocks fit in this folio.
1514 * @inode: The inode which contains the blocks.
1515 * @folio: The folio.
1516 *
1517 * If the block size is larger than the size of this folio, return zero.
1518 *
1519 * Context: The caller should hold a refcount on the folio to prevent it
1520 * from being split.
1521 * Return: The number of filesystem blocks covered by this folio.
1522 */
1523static inline
1524unsigned int i_blocks_per_folio(struct inode *inode, struct folio *folio)
1525{
1526	return folio_size(folio) >> inode->i_blkbits;
1527}
1528
1529static inline
1530unsigned int i_blocks_per_page(struct inode *inode, struct page *page)
1531{
1532	return i_blocks_per_folio(inode, page_folio(page));
1533}
1534#endif /* _LINUX_PAGEMAP_H */