at v6.4 1291 lines 36 kB view raw
1/* SPDX-License-Identifier: GPL-2.0 */ 2#ifndef _LINUX_HUGETLB_H 3#define _LINUX_HUGETLB_H 4 5#include <linux/mm.h> 6#include <linux/mm_types.h> 7#include <linux/mmdebug.h> 8#include <linux/fs.h> 9#include <linux/hugetlb_inline.h> 10#include <linux/cgroup.h> 11#include <linux/page_ref.h> 12#include <linux/list.h> 13#include <linux/kref.h> 14#include <linux/pgtable.h> 15#include <linux/gfp.h> 16#include <linux/userfaultfd_k.h> 17 18struct ctl_table; 19struct user_struct; 20struct mmu_gather; 21struct node; 22 23#ifndef CONFIG_ARCH_HAS_HUGEPD 24typedef struct { unsigned long pd; } hugepd_t; 25#define is_hugepd(hugepd) (0) 26#define __hugepd(x) ((hugepd_t) { (x) }) 27#endif 28 29#ifdef CONFIG_HUGETLB_PAGE 30 31#include <linux/mempolicy.h> 32#include <linux/shm.h> 33#include <asm/tlbflush.h> 34 35/* 36 * For HugeTLB page, there are more metadata to save in the struct page. But 37 * the head struct page cannot meet our needs, so we have to abuse other tail 38 * struct page to store the metadata. 39 */ 40#define __NR_USED_SUBPAGE 3 41 42struct hugepage_subpool { 43 spinlock_t lock; 44 long count; 45 long max_hpages; /* Maximum huge pages or -1 if no maximum. */ 46 long used_hpages; /* Used count against maximum, includes */ 47 /* both allocated and reserved pages. */ 48 struct hstate *hstate; 49 long min_hpages; /* Minimum huge pages or -1 if no minimum. */ 50 long rsv_hpages; /* Pages reserved against global pool to */ 51 /* satisfy minimum size. */ 52}; 53 54struct resv_map { 55 struct kref refs; 56 spinlock_t lock; 57 struct list_head regions; 58 long adds_in_progress; 59 struct list_head region_cache; 60 long region_cache_count; 61#ifdef CONFIG_CGROUP_HUGETLB 62 /* 63 * On private mappings, the counter to uncharge reservations is stored 64 * here. If these fields are 0, then either the mapping is shared, or 65 * cgroup accounting is disabled for this resv_map. 66 */ 67 struct page_counter *reservation_counter; 68 unsigned long pages_per_hpage; 69 struct cgroup_subsys_state *css; 70#endif 71}; 72 73/* 74 * Region tracking -- allows tracking of reservations and instantiated pages 75 * across the pages in a mapping. 76 * 77 * The region data structures are embedded into a resv_map and protected 78 * by a resv_map's lock. The set of regions within the resv_map represent 79 * reservations for huge pages, or huge pages that have already been 80 * instantiated within the map. The from and to elements are huge page 81 * indices into the associated mapping. from indicates the starting index 82 * of the region. to represents the first index past the end of the region. 83 * 84 * For example, a file region structure with from == 0 and to == 4 represents 85 * four huge pages in a mapping. It is important to note that the to element 86 * represents the first element past the end of the region. This is used in 87 * arithmetic as 4(to) - 0(from) = 4 huge pages in the region. 88 * 89 * Interval notation of the form [from, to) will be used to indicate that 90 * the endpoint from is inclusive and to is exclusive. 91 */ 92struct file_region { 93 struct list_head link; 94 long from; 95 long to; 96#ifdef CONFIG_CGROUP_HUGETLB 97 /* 98 * On shared mappings, each reserved region appears as a struct 99 * file_region in resv_map. These fields hold the info needed to 100 * uncharge each reservation. 101 */ 102 struct page_counter *reservation_counter; 103 struct cgroup_subsys_state *css; 104#endif 105}; 106 107struct hugetlb_vma_lock { 108 struct kref refs; 109 struct rw_semaphore rw_sema; 110 struct vm_area_struct *vma; 111}; 112 113extern struct resv_map *resv_map_alloc(void); 114void resv_map_release(struct kref *ref); 115 116extern spinlock_t hugetlb_lock; 117extern int hugetlb_max_hstate __read_mostly; 118#define for_each_hstate(h) \ 119 for ((h) = hstates; (h) < &hstates[hugetlb_max_hstate]; (h)++) 120 121struct hugepage_subpool *hugepage_new_subpool(struct hstate *h, long max_hpages, 122 long min_hpages); 123void hugepage_put_subpool(struct hugepage_subpool *spool); 124 125void hugetlb_dup_vma_private(struct vm_area_struct *vma); 126void clear_vma_resv_huge_pages(struct vm_area_struct *vma); 127int move_hugetlb_page_tables(struct vm_area_struct *vma, 128 struct vm_area_struct *new_vma, 129 unsigned long old_addr, unsigned long new_addr, 130 unsigned long len); 131int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, 132 struct vm_area_struct *, struct vm_area_struct *); 133struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma, 134 unsigned long address, unsigned int flags); 135long follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, 136 struct page **, struct vm_area_struct **, 137 unsigned long *, unsigned long *, long, unsigned int, 138 int *); 139void unmap_hugepage_range(struct vm_area_struct *, 140 unsigned long, unsigned long, struct page *, 141 zap_flags_t); 142void __unmap_hugepage_range_final(struct mmu_gather *tlb, 143 struct vm_area_struct *vma, 144 unsigned long start, unsigned long end, 145 struct page *ref_page, zap_flags_t zap_flags); 146void hugetlb_report_meminfo(struct seq_file *); 147int hugetlb_report_node_meminfo(char *buf, int len, int nid); 148void hugetlb_show_meminfo_node(int nid); 149unsigned long hugetlb_total_pages(void); 150vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, 151 unsigned long address, unsigned int flags); 152#ifdef CONFIG_USERFAULTFD 153int hugetlb_mfill_atomic_pte(pte_t *dst_pte, 154 struct vm_area_struct *dst_vma, 155 unsigned long dst_addr, 156 unsigned long src_addr, 157 uffd_flags_t flags, 158 struct folio **foliop); 159#endif /* CONFIG_USERFAULTFD */ 160bool hugetlb_reserve_pages(struct inode *inode, long from, long to, 161 struct vm_area_struct *vma, 162 vm_flags_t vm_flags); 163long hugetlb_unreserve_pages(struct inode *inode, long start, long end, 164 long freed); 165bool isolate_hugetlb(struct folio *folio, struct list_head *list); 166int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison); 167int get_huge_page_for_hwpoison(unsigned long pfn, int flags, 168 bool *migratable_cleared); 169void folio_putback_active_hugetlb(struct folio *folio); 170void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int reason); 171void free_huge_page(struct page *page); 172void hugetlb_fix_reserve_counts(struct inode *inode); 173extern struct mutex *hugetlb_fault_mutex_table; 174u32 hugetlb_fault_mutex_hash(struct address_space *mapping, pgoff_t idx); 175 176pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma, 177 unsigned long addr, pud_t *pud); 178 179struct address_space *hugetlb_page_mapping_lock_write(struct page *hpage); 180 181extern int sysctl_hugetlb_shm_group; 182extern struct list_head huge_boot_pages; 183 184/* arch callbacks */ 185 186#ifndef CONFIG_HIGHPTE 187/* 188 * pte_offset_huge() and pte_alloc_huge() are helpers for those architectures 189 * which may go down to the lowest PTE level in their huge_pte_offset() and 190 * huge_pte_alloc(): to avoid reliance on pte_offset_map() without pte_unmap(). 191 */ 192static inline pte_t *pte_offset_huge(pmd_t *pmd, unsigned long address) 193{ 194 return pte_offset_kernel(pmd, address); 195} 196static inline pte_t *pte_alloc_huge(struct mm_struct *mm, pmd_t *pmd, 197 unsigned long address) 198{ 199 return pte_alloc(mm, pmd) ? NULL : pte_offset_huge(pmd, address); 200} 201#endif 202 203pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, 204 unsigned long addr, unsigned long sz); 205/* 206 * huge_pte_offset(): Walk the hugetlb pgtable until the last level PTE. 207 * Returns the pte_t* if found, or NULL if the address is not mapped. 208 * 209 * IMPORTANT: we should normally not directly call this function, instead 210 * this is only a common interface to implement arch-specific 211 * walker. Please use hugetlb_walk() instead, because that will attempt to 212 * verify the locking for you. 213 * 214 * Since this function will walk all the pgtable pages (including not only 215 * high-level pgtable page, but also PUD entry that can be unshared 216 * concurrently for VM_SHARED), the caller of this function should be 217 * responsible of its thread safety. One can follow this rule: 218 * 219 * (1) For private mappings: pmd unsharing is not possible, so holding the 220 * mmap_lock for either read or write is sufficient. Most callers 221 * already hold the mmap_lock, so normally, no special action is 222 * required. 223 * 224 * (2) For shared mappings: pmd unsharing is possible (so the PUD-ranged 225 * pgtable page can go away from under us! It can be done by a pmd 226 * unshare with a follow up munmap() on the other process), then we 227 * need either: 228 * 229 * (2.1) hugetlb vma lock read or write held, to make sure pmd unshare 230 * won't happen upon the range (it also makes sure the pte_t we 231 * read is the right and stable one), or, 232 * 233 * (2.2) hugetlb mapping i_mmap_rwsem lock held read or write, to make 234 * sure even if unshare happened the racy unmap() will wait until 235 * i_mmap_rwsem is released. 236 * 237 * Option (2.1) is the safest, which guarantees pte stability from pmd 238 * sharing pov, until the vma lock released. Option (2.2) doesn't protect 239 * a concurrent pmd unshare, but it makes sure the pgtable page is safe to 240 * access. 241 */ 242pte_t *huge_pte_offset(struct mm_struct *mm, 243 unsigned long addr, unsigned long sz); 244unsigned long hugetlb_mask_last_page(struct hstate *h); 245int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, 246 unsigned long addr, pte_t *ptep); 247void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, 248 unsigned long *start, unsigned long *end); 249 250void hugetlb_vma_lock_read(struct vm_area_struct *vma); 251void hugetlb_vma_unlock_read(struct vm_area_struct *vma); 252void hugetlb_vma_lock_write(struct vm_area_struct *vma); 253void hugetlb_vma_unlock_write(struct vm_area_struct *vma); 254int hugetlb_vma_trylock_write(struct vm_area_struct *vma); 255void hugetlb_vma_assert_locked(struct vm_area_struct *vma); 256void hugetlb_vma_lock_release(struct kref *kref); 257 258int pmd_huge(pmd_t pmd); 259int pud_huge(pud_t pud); 260long hugetlb_change_protection(struct vm_area_struct *vma, 261 unsigned long address, unsigned long end, pgprot_t newprot, 262 unsigned long cp_flags); 263 264bool is_hugetlb_entry_migration(pte_t pte); 265void hugetlb_unshare_all_pmds(struct vm_area_struct *vma); 266 267#else /* !CONFIG_HUGETLB_PAGE */ 268 269static inline void hugetlb_dup_vma_private(struct vm_area_struct *vma) 270{ 271} 272 273static inline void clear_vma_resv_huge_pages(struct vm_area_struct *vma) 274{ 275} 276 277static inline unsigned long hugetlb_total_pages(void) 278{ 279 return 0; 280} 281 282static inline struct address_space *hugetlb_page_mapping_lock_write( 283 struct page *hpage) 284{ 285 return NULL; 286} 287 288static inline int huge_pmd_unshare(struct mm_struct *mm, 289 struct vm_area_struct *vma, 290 unsigned long addr, pte_t *ptep) 291{ 292 return 0; 293} 294 295static inline void adjust_range_if_pmd_sharing_possible( 296 struct vm_area_struct *vma, 297 unsigned long *start, unsigned long *end) 298{ 299} 300 301static inline struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma, 302 unsigned long address, unsigned int flags) 303{ 304 BUILD_BUG(); /* should never be compiled in if !CONFIG_HUGETLB_PAGE*/ 305} 306 307static inline long follow_hugetlb_page(struct mm_struct *mm, 308 struct vm_area_struct *vma, struct page **pages, 309 struct vm_area_struct **vmas, unsigned long *position, 310 unsigned long *nr_pages, long i, unsigned int flags, 311 int *nonblocking) 312{ 313 BUG(); 314 return 0; 315} 316 317static inline int copy_hugetlb_page_range(struct mm_struct *dst, 318 struct mm_struct *src, 319 struct vm_area_struct *dst_vma, 320 struct vm_area_struct *src_vma) 321{ 322 BUG(); 323 return 0; 324} 325 326static inline int move_hugetlb_page_tables(struct vm_area_struct *vma, 327 struct vm_area_struct *new_vma, 328 unsigned long old_addr, 329 unsigned long new_addr, 330 unsigned long len) 331{ 332 BUG(); 333 return 0; 334} 335 336static inline void hugetlb_report_meminfo(struct seq_file *m) 337{ 338} 339 340static inline int hugetlb_report_node_meminfo(char *buf, int len, int nid) 341{ 342 return 0; 343} 344 345static inline void hugetlb_show_meminfo_node(int nid) 346{ 347} 348 349static inline int prepare_hugepage_range(struct file *file, 350 unsigned long addr, unsigned long len) 351{ 352 return -EINVAL; 353} 354 355static inline void hugetlb_vma_lock_read(struct vm_area_struct *vma) 356{ 357} 358 359static inline void hugetlb_vma_unlock_read(struct vm_area_struct *vma) 360{ 361} 362 363static inline void hugetlb_vma_lock_write(struct vm_area_struct *vma) 364{ 365} 366 367static inline void hugetlb_vma_unlock_write(struct vm_area_struct *vma) 368{ 369} 370 371static inline int hugetlb_vma_trylock_write(struct vm_area_struct *vma) 372{ 373 return 1; 374} 375 376static inline void hugetlb_vma_assert_locked(struct vm_area_struct *vma) 377{ 378} 379 380static inline int pmd_huge(pmd_t pmd) 381{ 382 return 0; 383} 384 385static inline int pud_huge(pud_t pud) 386{ 387 return 0; 388} 389 390static inline int is_hugepage_only_range(struct mm_struct *mm, 391 unsigned long addr, unsigned long len) 392{ 393 return 0; 394} 395 396static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, 397 unsigned long addr, unsigned long end, 398 unsigned long floor, unsigned long ceiling) 399{ 400 BUG(); 401} 402 403#ifdef CONFIG_USERFAULTFD 404static inline int hugetlb_mfill_atomic_pte(pte_t *dst_pte, 405 struct vm_area_struct *dst_vma, 406 unsigned long dst_addr, 407 unsigned long src_addr, 408 uffd_flags_t flags, 409 struct folio **foliop) 410{ 411 BUG(); 412 return 0; 413} 414#endif /* CONFIG_USERFAULTFD */ 415 416static inline pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, 417 unsigned long sz) 418{ 419 return NULL; 420} 421 422static inline bool isolate_hugetlb(struct folio *folio, struct list_head *list) 423{ 424 return false; 425} 426 427static inline int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison) 428{ 429 return 0; 430} 431 432static inline int get_huge_page_for_hwpoison(unsigned long pfn, int flags, 433 bool *migratable_cleared) 434{ 435 return 0; 436} 437 438static inline void folio_putback_active_hugetlb(struct folio *folio) 439{ 440} 441 442static inline void move_hugetlb_state(struct folio *old_folio, 443 struct folio *new_folio, int reason) 444{ 445} 446 447static inline long hugetlb_change_protection( 448 struct vm_area_struct *vma, unsigned long address, 449 unsigned long end, pgprot_t newprot, 450 unsigned long cp_flags) 451{ 452 return 0; 453} 454 455static inline void __unmap_hugepage_range_final(struct mmu_gather *tlb, 456 struct vm_area_struct *vma, unsigned long start, 457 unsigned long end, struct page *ref_page, 458 zap_flags_t zap_flags) 459{ 460 BUG(); 461} 462 463static inline vm_fault_t hugetlb_fault(struct mm_struct *mm, 464 struct vm_area_struct *vma, unsigned long address, 465 unsigned int flags) 466{ 467 BUG(); 468 return 0; 469} 470 471static inline void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { } 472 473#endif /* !CONFIG_HUGETLB_PAGE */ 474/* 475 * hugepages at page global directory. If arch support 476 * hugepages at pgd level, they need to define this. 477 */ 478#ifndef pgd_huge 479#define pgd_huge(x) 0 480#endif 481#ifndef p4d_huge 482#define p4d_huge(x) 0 483#endif 484 485#ifndef pgd_write 486static inline int pgd_write(pgd_t pgd) 487{ 488 BUG(); 489 return 0; 490} 491#endif 492 493#define HUGETLB_ANON_FILE "anon_hugepage" 494 495enum { 496 /* 497 * The file will be used as an shm file so shmfs accounting rules 498 * apply 499 */ 500 HUGETLB_SHMFS_INODE = 1, 501 /* 502 * The file is being created on the internal vfs mount and shmfs 503 * accounting rules do not apply 504 */ 505 HUGETLB_ANONHUGE_INODE = 2, 506}; 507 508#ifdef CONFIG_HUGETLBFS 509struct hugetlbfs_sb_info { 510 long max_inodes; /* inodes allowed */ 511 long free_inodes; /* inodes free */ 512 spinlock_t stat_lock; 513 struct hstate *hstate; 514 struct hugepage_subpool *spool; 515 kuid_t uid; 516 kgid_t gid; 517 umode_t mode; 518}; 519 520static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb) 521{ 522 return sb->s_fs_info; 523} 524 525struct hugetlbfs_inode_info { 526 struct shared_policy policy; 527 struct inode vfs_inode; 528 unsigned int seals; 529}; 530 531static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode) 532{ 533 return container_of(inode, struct hugetlbfs_inode_info, vfs_inode); 534} 535 536extern const struct file_operations hugetlbfs_file_operations; 537extern const struct vm_operations_struct hugetlb_vm_ops; 538struct file *hugetlb_file_setup(const char *name, size_t size, vm_flags_t acct, 539 int creat_flags, int page_size_log); 540 541static inline bool is_file_hugepages(struct file *file) 542{ 543 if (file->f_op == &hugetlbfs_file_operations) 544 return true; 545 546 return is_file_shm_hugepages(file); 547} 548 549static inline struct hstate *hstate_inode(struct inode *i) 550{ 551 return HUGETLBFS_SB(i->i_sb)->hstate; 552} 553#else /* !CONFIG_HUGETLBFS */ 554 555#define is_file_hugepages(file) false 556static inline struct file * 557hugetlb_file_setup(const char *name, size_t size, vm_flags_t acctflag, 558 int creat_flags, int page_size_log) 559{ 560 return ERR_PTR(-ENOSYS); 561} 562 563static inline struct hstate *hstate_inode(struct inode *i) 564{ 565 return NULL; 566} 567#endif /* !CONFIG_HUGETLBFS */ 568 569#ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA 570unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, 571 unsigned long len, unsigned long pgoff, 572 unsigned long flags); 573#endif /* HAVE_ARCH_HUGETLB_UNMAPPED_AREA */ 574 575unsigned long 576generic_hugetlb_get_unmapped_area(struct file *file, unsigned long addr, 577 unsigned long len, unsigned long pgoff, 578 unsigned long flags); 579 580/* 581 * huegtlb page specific state flags. These flags are located in page.private 582 * of the hugetlb head page. Functions created via the below macros should be 583 * used to manipulate these flags. 584 * 585 * HPG_restore_reserve - Set when a hugetlb page consumes a reservation at 586 * allocation time. Cleared when page is fully instantiated. Free 587 * routine checks flag to restore a reservation on error paths. 588 * Synchronization: Examined or modified by code that knows it has 589 * the only reference to page. i.e. After allocation but before use 590 * or when the page is being freed. 591 * HPG_migratable - Set after a newly allocated page is added to the page 592 * cache and/or page tables. Indicates the page is a candidate for 593 * migration. 594 * Synchronization: Initially set after new page allocation with no 595 * locking. When examined and modified during migration processing 596 * (isolate, migrate, putback) the hugetlb_lock is held. 597 * HPG_temporary - Set on a page that is temporarily allocated from the buddy 598 * allocator. Typically used for migration target pages when no pages 599 * are available in the pool. The hugetlb free page path will 600 * immediately free pages with this flag set to the buddy allocator. 601 * Synchronization: Can be set after huge page allocation from buddy when 602 * code knows it has only reference. All other examinations and 603 * modifications require hugetlb_lock. 604 * HPG_freed - Set when page is on the free lists. 605 * Synchronization: hugetlb_lock held for examination and modification. 606 * HPG_vmemmap_optimized - Set when the vmemmap pages of the page are freed. 607 * HPG_raw_hwp_unreliable - Set when the hugetlb page has a hwpoison sub-page 608 * that is not tracked by raw_hwp_page list. 609 */ 610enum hugetlb_page_flags { 611 HPG_restore_reserve = 0, 612 HPG_migratable, 613 HPG_temporary, 614 HPG_freed, 615 HPG_vmemmap_optimized, 616 HPG_raw_hwp_unreliable, 617 __NR_HPAGEFLAGS, 618}; 619 620/* 621 * Macros to create test, set and clear function definitions for 622 * hugetlb specific page flags. 623 */ 624#ifdef CONFIG_HUGETLB_PAGE 625#define TESTHPAGEFLAG(uname, flname) \ 626static __always_inline \ 627bool folio_test_hugetlb_##flname(struct folio *folio) \ 628 { void *private = &folio->private; \ 629 return test_bit(HPG_##flname, private); \ 630 } \ 631static inline int HPage##uname(struct page *page) \ 632 { return test_bit(HPG_##flname, &(page->private)); } 633 634#define SETHPAGEFLAG(uname, flname) \ 635static __always_inline \ 636void folio_set_hugetlb_##flname(struct folio *folio) \ 637 { void *private = &folio->private; \ 638 set_bit(HPG_##flname, private); \ 639 } \ 640static inline void SetHPage##uname(struct page *page) \ 641 { set_bit(HPG_##flname, &(page->private)); } 642 643#define CLEARHPAGEFLAG(uname, flname) \ 644static __always_inline \ 645void folio_clear_hugetlb_##flname(struct folio *folio) \ 646 { void *private = &folio->private; \ 647 clear_bit(HPG_##flname, private); \ 648 } \ 649static inline void ClearHPage##uname(struct page *page) \ 650 { clear_bit(HPG_##flname, &(page->private)); } 651#else 652#define TESTHPAGEFLAG(uname, flname) \ 653static inline bool \ 654folio_test_hugetlb_##flname(struct folio *folio) \ 655 { return 0; } \ 656static inline int HPage##uname(struct page *page) \ 657 { return 0; } 658 659#define SETHPAGEFLAG(uname, flname) \ 660static inline void \ 661folio_set_hugetlb_##flname(struct folio *folio) \ 662 { } \ 663static inline void SetHPage##uname(struct page *page) \ 664 { } 665 666#define CLEARHPAGEFLAG(uname, flname) \ 667static inline void \ 668folio_clear_hugetlb_##flname(struct folio *folio) \ 669 { } \ 670static inline void ClearHPage##uname(struct page *page) \ 671 { } 672#endif 673 674#define HPAGEFLAG(uname, flname) \ 675 TESTHPAGEFLAG(uname, flname) \ 676 SETHPAGEFLAG(uname, flname) \ 677 CLEARHPAGEFLAG(uname, flname) \ 678 679/* 680 * Create functions associated with hugetlb page flags 681 */ 682HPAGEFLAG(RestoreReserve, restore_reserve) 683HPAGEFLAG(Migratable, migratable) 684HPAGEFLAG(Temporary, temporary) 685HPAGEFLAG(Freed, freed) 686HPAGEFLAG(VmemmapOptimized, vmemmap_optimized) 687HPAGEFLAG(RawHwpUnreliable, raw_hwp_unreliable) 688 689#ifdef CONFIG_HUGETLB_PAGE 690 691#define HSTATE_NAME_LEN 32 692/* Defines one hugetlb page size */ 693struct hstate { 694 struct mutex resize_lock; 695 int next_nid_to_alloc; 696 int next_nid_to_free; 697 unsigned int order; 698 unsigned int demote_order; 699 unsigned long mask; 700 unsigned long max_huge_pages; 701 unsigned long nr_huge_pages; 702 unsigned long free_huge_pages; 703 unsigned long resv_huge_pages; 704 unsigned long surplus_huge_pages; 705 unsigned long nr_overcommit_huge_pages; 706 struct list_head hugepage_activelist; 707 struct list_head hugepage_freelists[MAX_NUMNODES]; 708 unsigned int max_huge_pages_node[MAX_NUMNODES]; 709 unsigned int nr_huge_pages_node[MAX_NUMNODES]; 710 unsigned int free_huge_pages_node[MAX_NUMNODES]; 711 unsigned int surplus_huge_pages_node[MAX_NUMNODES]; 712#ifdef CONFIG_CGROUP_HUGETLB 713 /* cgroup control files */ 714 struct cftype cgroup_files_dfl[8]; 715 struct cftype cgroup_files_legacy[10]; 716#endif 717 char name[HSTATE_NAME_LEN]; 718}; 719 720struct huge_bootmem_page { 721 struct list_head list; 722 struct hstate *hstate; 723}; 724 725int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list); 726struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, 727 unsigned long addr, int avoid_reserve); 728struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid, 729 nodemask_t *nmask, gfp_t gfp_mask); 730struct folio *alloc_hugetlb_folio_vma(struct hstate *h, struct vm_area_struct *vma, 731 unsigned long address); 732int hugetlb_add_to_page_cache(struct folio *folio, struct address_space *mapping, 733 pgoff_t idx); 734void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma, 735 unsigned long address, struct folio *folio); 736 737/* arch callback */ 738int __init __alloc_bootmem_huge_page(struct hstate *h, int nid); 739int __init alloc_bootmem_huge_page(struct hstate *h, int nid); 740bool __init hugetlb_node_alloc_supported(void); 741 742void __init hugetlb_add_hstate(unsigned order); 743bool __init arch_hugetlb_valid_size(unsigned long size); 744struct hstate *size_to_hstate(unsigned long size); 745 746#ifndef HUGE_MAX_HSTATE 747#define HUGE_MAX_HSTATE 1 748#endif 749 750extern struct hstate hstates[HUGE_MAX_HSTATE]; 751extern unsigned int default_hstate_idx; 752 753#define default_hstate (hstates[default_hstate_idx]) 754 755static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio) 756{ 757 return folio->_hugetlb_subpool; 758} 759 760/* 761 * hugetlb page subpool pointer located in hpage[2].hugetlb_subpool 762 */ 763static inline struct hugepage_subpool *hugetlb_page_subpool(struct page *hpage) 764{ 765 return hugetlb_folio_subpool(page_folio(hpage)); 766} 767 768static inline void hugetlb_set_folio_subpool(struct folio *folio, 769 struct hugepage_subpool *subpool) 770{ 771 folio->_hugetlb_subpool = subpool; 772} 773 774static inline void hugetlb_set_page_subpool(struct page *hpage, 775 struct hugepage_subpool *subpool) 776{ 777 hugetlb_set_folio_subpool(page_folio(hpage), subpool); 778} 779 780static inline struct hstate *hstate_file(struct file *f) 781{ 782 return hstate_inode(file_inode(f)); 783} 784 785static inline struct hstate *hstate_sizelog(int page_size_log) 786{ 787 if (!page_size_log) 788 return &default_hstate; 789 790 if (page_size_log < BITS_PER_LONG) 791 return size_to_hstate(1UL << page_size_log); 792 793 return NULL; 794} 795 796static inline struct hstate *hstate_vma(struct vm_area_struct *vma) 797{ 798 return hstate_file(vma->vm_file); 799} 800 801static inline unsigned long huge_page_size(const struct hstate *h) 802{ 803 return (unsigned long)PAGE_SIZE << h->order; 804} 805 806extern unsigned long vma_kernel_pagesize(struct vm_area_struct *vma); 807 808extern unsigned long vma_mmu_pagesize(struct vm_area_struct *vma); 809 810static inline unsigned long huge_page_mask(struct hstate *h) 811{ 812 return h->mask; 813} 814 815static inline unsigned int huge_page_order(struct hstate *h) 816{ 817 return h->order; 818} 819 820static inline unsigned huge_page_shift(struct hstate *h) 821{ 822 return h->order + PAGE_SHIFT; 823} 824 825static inline bool hstate_is_gigantic(struct hstate *h) 826{ 827 return huge_page_order(h) > MAX_ORDER; 828} 829 830static inline unsigned int pages_per_huge_page(const struct hstate *h) 831{ 832 return 1 << h->order; 833} 834 835static inline unsigned int blocks_per_huge_page(struct hstate *h) 836{ 837 return huge_page_size(h) / 512; 838} 839 840#include <asm/hugetlb.h> 841 842#ifndef is_hugepage_only_range 843static inline int is_hugepage_only_range(struct mm_struct *mm, 844 unsigned long addr, unsigned long len) 845{ 846 return 0; 847} 848#define is_hugepage_only_range is_hugepage_only_range 849#endif 850 851#ifndef arch_clear_hugepage_flags 852static inline void arch_clear_hugepage_flags(struct page *page) { } 853#define arch_clear_hugepage_flags arch_clear_hugepage_flags 854#endif 855 856#ifndef arch_make_huge_pte 857static inline pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, 858 vm_flags_t flags) 859{ 860 return pte_mkhuge(entry); 861} 862#endif 863 864static inline struct hstate *folio_hstate(struct folio *folio) 865{ 866 VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio); 867 return size_to_hstate(folio_size(folio)); 868} 869 870static inline struct hstate *page_hstate(struct page *page) 871{ 872 return folio_hstate(page_folio(page)); 873} 874 875static inline unsigned hstate_index_to_shift(unsigned index) 876{ 877 return hstates[index].order + PAGE_SHIFT; 878} 879 880static inline int hstate_index(struct hstate *h) 881{ 882 return h - hstates; 883} 884 885extern int dissolve_free_huge_page(struct page *page); 886extern int dissolve_free_huge_pages(unsigned long start_pfn, 887 unsigned long end_pfn); 888 889#ifdef CONFIG_MEMORY_FAILURE 890extern void folio_clear_hugetlb_hwpoison(struct folio *folio); 891#else 892static inline void folio_clear_hugetlb_hwpoison(struct folio *folio) 893{ 894} 895#endif 896 897#ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION 898#ifndef arch_hugetlb_migration_supported 899static inline bool arch_hugetlb_migration_supported(struct hstate *h) 900{ 901 if ((huge_page_shift(h) == PMD_SHIFT) || 902 (huge_page_shift(h) == PUD_SHIFT) || 903 (huge_page_shift(h) == PGDIR_SHIFT)) 904 return true; 905 else 906 return false; 907} 908#endif 909#else 910static inline bool arch_hugetlb_migration_supported(struct hstate *h) 911{ 912 return false; 913} 914#endif 915 916static inline bool hugepage_migration_supported(struct hstate *h) 917{ 918 return arch_hugetlb_migration_supported(h); 919} 920 921/* 922 * Movability check is different as compared to migration check. 923 * It determines whether or not a huge page should be placed on 924 * movable zone or not. Movability of any huge page should be 925 * required only if huge page size is supported for migration. 926 * There won't be any reason for the huge page to be movable if 927 * it is not migratable to start with. Also the size of the huge 928 * page should be large enough to be placed under a movable zone 929 * and still feasible enough to be migratable. Just the presence 930 * in movable zone does not make the migration feasible. 931 * 932 * So even though large huge page sizes like the gigantic ones 933 * are migratable they should not be movable because its not 934 * feasible to migrate them from movable zone. 935 */ 936static inline bool hugepage_movable_supported(struct hstate *h) 937{ 938 if (!hugepage_migration_supported(h)) 939 return false; 940 941 if (hstate_is_gigantic(h)) 942 return false; 943 return true; 944} 945 946/* Movability of hugepages depends on migration support. */ 947static inline gfp_t htlb_alloc_mask(struct hstate *h) 948{ 949 if (hugepage_movable_supported(h)) 950 return GFP_HIGHUSER_MOVABLE; 951 else 952 return GFP_HIGHUSER; 953} 954 955static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask) 956{ 957 gfp_t modified_mask = htlb_alloc_mask(h); 958 959 /* Some callers might want to enforce node */ 960 modified_mask |= (gfp_mask & __GFP_THISNODE); 961 962 modified_mask |= (gfp_mask & __GFP_NOWARN); 963 964 return modified_mask; 965} 966 967static inline spinlock_t *huge_pte_lockptr(struct hstate *h, 968 struct mm_struct *mm, pte_t *pte) 969{ 970 if (huge_page_size(h) == PMD_SIZE) 971 return pmd_lockptr(mm, (pmd_t *) pte); 972 VM_BUG_ON(huge_page_size(h) == PAGE_SIZE); 973 return &mm->page_table_lock; 974} 975 976#ifndef hugepages_supported 977/* 978 * Some platform decide whether they support huge pages at boot 979 * time. Some of them, such as powerpc, set HPAGE_SHIFT to 0 980 * when there is no such support 981 */ 982#define hugepages_supported() (HPAGE_SHIFT != 0) 983#endif 984 985void hugetlb_report_usage(struct seq_file *m, struct mm_struct *mm); 986 987static inline void hugetlb_count_init(struct mm_struct *mm) 988{ 989 atomic_long_set(&mm->hugetlb_usage, 0); 990} 991 992static inline void hugetlb_count_add(long l, struct mm_struct *mm) 993{ 994 atomic_long_add(l, &mm->hugetlb_usage); 995} 996 997static inline void hugetlb_count_sub(long l, struct mm_struct *mm) 998{ 999 atomic_long_sub(l, &mm->hugetlb_usage); 1000} 1001 1002#ifndef huge_ptep_modify_prot_start 1003#define huge_ptep_modify_prot_start huge_ptep_modify_prot_start 1004static inline pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, 1005 unsigned long addr, pte_t *ptep) 1006{ 1007 return huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); 1008} 1009#endif 1010 1011#ifndef huge_ptep_modify_prot_commit 1012#define huge_ptep_modify_prot_commit huge_ptep_modify_prot_commit 1013static inline void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, 1014 unsigned long addr, pte_t *ptep, 1015 pte_t old_pte, pte_t pte) 1016{ 1017 set_huge_pte_at(vma->vm_mm, addr, ptep, pte); 1018} 1019#endif 1020 1021#ifdef CONFIG_NUMA 1022void hugetlb_register_node(struct node *node); 1023void hugetlb_unregister_node(struct node *node); 1024#endif 1025 1026#else /* CONFIG_HUGETLB_PAGE */ 1027struct hstate {}; 1028 1029static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio) 1030{ 1031 return NULL; 1032} 1033 1034static inline struct hugepage_subpool *hugetlb_page_subpool(struct page *hpage) 1035{ 1036 return NULL; 1037} 1038 1039static inline int isolate_or_dissolve_huge_page(struct page *page, 1040 struct list_head *list) 1041{ 1042 return -ENOMEM; 1043} 1044 1045static inline struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, 1046 unsigned long addr, 1047 int avoid_reserve) 1048{ 1049 return NULL; 1050} 1051 1052static inline struct folio * 1053alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid, 1054 nodemask_t *nmask, gfp_t gfp_mask) 1055{ 1056 return NULL; 1057} 1058 1059static inline struct folio *alloc_hugetlb_folio_vma(struct hstate *h, 1060 struct vm_area_struct *vma, 1061 unsigned long address) 1062{ 1063 return NULL; 1064} 1065 1066static inline int __alloc_bootmem_huge_page(struct hstate *h) 1067{ 1068 return 0; 1069} 1070 1071static inline struct hstate *hstate_file(struct file *f) 1072{ 1073 return NULL; 1074} 1075 1076static inline struct hstate *hstate_sizelog(int page_size_log) 1077{ 1078 return NULL; 1079} 1080 1081static inline struct hstate *hstate_vma(struct vm_area_struct *vma) 1082{ 1083 return NULL; 1084} 1085 1086static inline struct hstate *folio_hstate(struct folio *folio) 1087{ 1088 return NULL; 1089} 1090 1091static inline struct hstate *page_hstate(struct page *page) 1092{ 1093 return NULL; 1094} 1095 1096static inline struct hstate *size_to_hstate(unsigned long size) 1097{ 1098 return NULL; 1099} 1100 1101static inline unsigned long huge_page_size(struct hstate *h) 1102{ 1103 return PAGE_SIZE; 1104} 1105 1106static inline unsigned long huge_page_mask(struct hstate *h) 1107{ 1108 return PAGE_MASK; 1109} 1110 1111static inline unsigned long vma_kernel_pagesize(struct vm_area_struct *vma) 1112{ 1113 return PAGE_SIZE; 1114} 1115 1116static inline unsigned long vma_mmu_pagesize(struct vm_area_struct *vma) 1117{ 1118 return PAGE_SIZE; 1119} 1120 1121static inline unsigned int huge_page_order(struct hstate *h) 1122{ 1123 return 0; 1124} 1125 1126static inline unsigned int huge_page_shift(struct hstate *h) 1127{ 1128 return PAGE_SHIFT; 1129} 1130 1131static inline bool hstate_is_gigantic(struct hstate *h) 1132{ 1133 return false; 1134} 1135 1136static inline unsigned int pages_per_huge_page(struct hstate *h) 1137{ 1138 return 1; 1139} 1140 1141static inline unsigned hstate_index_to_shift(unsigned index) 1142{ 1143 return 0; 1144} 1145 1146static inline int hstate_index(struct hstate *h) 1147{ 1148 return 0; 1149} 1150 1151static inline int dissolve_free_huge_page(struct page *page) 1152{ 1153 return 0; 1154} 1155 1156static inline int dissolve_free_huge_pages(unsigned long start_pfn, 1157 unsigned long end_pfn) 1158{ 1159 return 0; 1160} 1161 1162static inline bool hugepage_migration_supported(struct hstate *h) 1163{ 1164 return false; 1165} 1166 1167static inline bool hugepage_movable_supported(struct hstate *h) 1168{ 1169 return false; 1170} 1171 1172static inline gfp_t htlb_alloc_mask(struct hstate *h) 1173{ 1174 return 0; 1175} 1176 1177static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask) 1178{ 1179 return 0; 1180} 1181 1182static inline spinlock_t *huge_pte_lockptr(struct hstate *h, 1183 struct mm_struct *mm, pte_t *pte) 1184{ 1185 return &mm->page_table_lock; 1186} 1187 1188static inline void hugetlb_count_init(struct mm_struct *mm) 1189{ 1190} 1191 1192static inline void hugetlb_report_usage(struct seq_file *f, struct mm_struct *m) 1193{ 1194} 1195 1196static inline void hugetlb_count_sub(long l, struct mm_struct *mm) 1197{ 1198} 1199 1200static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, 1201 unsigned long addr, pte_t *ptep) 1202{ 1203 return *ptep; 1204} 1205 1206static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, 1207 pte_t *ptep, pte_t pte) 1208{ 1209} 1210 1211static inline void hugetlb_register_node(struct node *node) 1212{ 1213} 1214 1215static inline void hugetlb_unregister_node(struct node *node) 1216{ 1217} 1218#endif /* CONFIG_HUGETLB_PAGE */ 1219 1220static inline spinlock_t *huge_pte_lock(struct hstate *h, 1221 struct mm_struct *mm, pte_t *pte) 1222{ 1223 spinlock_t *ptl; 1224 1225 ptl = huge_pte_lockptr(h, mm, pte); 1226 spin_lock(ptl); 1227 return ptl; 1228} 1229 1230#if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_CMA) 1231extern void __init hugetlb_cma_reserve(int order); 1232#else 1233static inline __init void hugetlb_cma_reserve(int order) 1234{ 1235} 1236#endif 1237 1238#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE 1239static inline bool hugetlb_pmd_shared(pte_t *pte) 1240{ 1241 return page_count(virt_to_page(pte)) > 1; 1242} 1243#else 1244static inline bool hugetlb_pmd_shared(pte_t *pte) 1245{ 1246 return false; 1247} 1248#endif 1249 1250bool want_pmd_share(struct vm_area_struct *vma, unsigned long addr); 1251 1252#ifndef __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE 1253/* 1254 * ARCHes with special requirements for evicting HUGETLB backing TLB entries can 1255 * implement this. 1256 */ 1257#define flush_hugetlb_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end) 1258#endif 1259 1260static inline bool __vma_shareable_lock(struct vm_area_struct *vma) 1261{ 1262 return (vma->vm_flags & VM_MAYSHARE) && vma->vm_private_data; 1263} 1264 1265/* 1266 * Safe version of huge_pte_offset() to check the locks. See comments 1267 * above huge_pte_offset(). 1268 */ 1269static inline pte_t * 1270hugetlb_walk(struct vm_area_struct *vma, unsigned long addr, unsigned long sz) 1271{ 1272#if defined(CONFIG_HUGETLB_PAGE) && \ 1273 defined(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) && defined(CONFIG_LOCKDEP) 1274 struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; 1275 1276 /* 1277 * If pmd sharing possible, locking needed to safely walk the 1278 * hugetlb pgtables. More information can be found at the comment 1279 * above huge_pte_offset() in the same file. 1280 * 1281 * NOTE: lockdep_is_held() is only defined with CONFIG_LOCKDEP. 1282 */ 1283 if (__vma_shareable_lock(vma)) 1284 WARN_ON_ONCE(!lockdep_is_held(&vma_lock->rw_sema) && 1285 !lockdep_is_held( 1286 &vma->vm_file->f_mapping->i_mmap_rwsem)); 1287#endif 1288 return huge_pte_offset(vma->vm_mm, addr, sz); 1289} 1290 1291#endif /* _LINUX_HUGETLB_H */