Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
at v6.5 1274 lines 35 kB view raw
1/* SPDX-License-Identifier: GPL-2.0 */ 2#ifndef _LINUX_HUGETLB_H 3#define _LINUX_HUGETLB_H 4 5#include <linux/mm.h> 6#include <linux/mm_types.h> 7#include <linux/mmdebug.h> 8#include <linux/fs.h> 9#include <linux/hugetlb_inline.h> 10#include <linux/cgroup.h> 11#include <linux/page_ref.h> 12#include <linux/list.h> 13#include <linux/kref.h> 14#include <linux/pgtable.h> 15#include <linux/gfp.h> 16#include <linux/userfaultfd_k.h> 17 18struct ctl_table; 19struct user_struct; 20struct mmu_gather; 21struct node; 22 23#ifndef CONFIG_ARCH_HAS_HUGEPD 24typedef struct { unsigned long pd; } hugepd_t; 25#define is_hugepd(hugepd) (0) 26#define __hugepd(x) ((hugepd_t) { (x) }) 27#endif 28 29#ifdef CONFIG_HUGETLB_PAGE 30 31#include <linux/mempolicy.h> 32#include <linux/shm.h> 33#include <asm/tlbflush.h> 34 35/* 36 * For HugeTLB page, there are more metadata to save in the struct page. But 37 * the head struct page cannot meet our needs, so we have to abuse other tail 38 * struct page to store the metadata. 39 */ 40#define __NR_USED_SUBPAGE 3 41 42struct hugepage_subpool { 43 spinlock_t lock; 44 long count; 45 long max_hpages; /* Maximum huge pages or -1 if no maximum. */ 46 long used_hpages; /* Used count against maximum, includes */ 47 /* both allocated and reserved pages. */ 48 struct hstate *hstate; 49 long min_hpages; /* Minimum huge pages or -1 if no minimum. */ 50 long rsv_hpages; /* Pages reserved against global pool to */ 51 /* satisfy minimum size. */ 52}; 53 54struct resv_map { 55 struct kref refs; 56 spinlock_t lock; 57 struct list_head regions; 58 long adds_in_progress; 59 struct list_head region_cache; 60 long region_cache_count; 61#ifdef CONFIG_CGROUP_HUGETLB 62 /* 63 * On private mappings, the counter to uncharge reservations is stored 64 * here. If these fields are 0, then either the mapping is shared, or 65 * cgroup accounting is disabled for this resv_map. 66 */ 67 struct page_counter *reservation_counter; 68 unsigned long pages_per_hpage; 69 struct cgroup_subsys_state *css; 70#endif 71}; 72 73/* 74 * Region tracking -- allows tracking of reservations and instantiated pages 75 * across the pages in a mapping. 76 * 77 * The region data structures are embedded into a resv_map and protected 78 * by a resv_map's lock. The set of regions within the resv_map represent 79 * reservations for huge pages, or huge pages that have already been 80 * instantiated within the map. The from and to elements are huge page 81 * indices into the associated mapping. from indicates the starting index 82 * of the region. to represents the first index past the end of the region. 83 * 84 * For example, a file region structure with from == 0 and to == 4 represents 85 * four huge pages in a mapping. It is important to note that the to element 86 * represents the first element past the end of the region. This is used in 87 * arithmetic as 4(to) - 0(from) = 4 huge pages in the region. 88 * 89 * Interval notation of the form [from, to) will be used to indicate that 90 * the endpoint from is inclusive and to is exclusive. 91 */ 92struct file_region { 93 struct list_head link; 94 long from; 95 long to; 96#ifdef CONFIG_CGROUP_HUGETLB 97 /* 98 * On shared mappings, each reserved region appears as a struct 99 * file_region in resv_map. These fields hold the info needed to 100 * uncharge each reservation. 101 */ 102 struct page_counter *reservation_counter; 103 struct cgroup_subsys_state *css; 104#endif 105}; 106 107struct hugetlb_vma_lock { 108 struct kref refs; 109 struct rw_semaphore rw_sema; 110 struct vm_area_struct *vma; 111}; 112 113extern struct resv_map *resv_map_alloc(void); 114void resv_map_release(struct kref *ref); 115 116extern spinlock_t hugetlb_lock; 117extern int hugetlb_max_hstate __read_mostly; 118#define for_each_hstate(h) \ 119 for ((h) = hstates; (h) < &hstates[hugetlb_max_hstate]; (h)++) 120 121struct hugepage_subpool *hugepage_new_subpool(struct hstate *h, long max_hpages, 122 long min_hpages); 123void hugepage_put_subpool(struct hugepage_subpool *spool); 124 125void hugetlb_dup_vma_private(struct vm_area_struct *vma); 126void clear_vma_resv_huge_pages(struct vm_area_struct *vma); 127int move_hugetlb_page_tables(struct vm_area_struct *vma, 128 struct vm_area_struct *new_vma, 129 unsigned long old_addr, unsigned long new_addr, 130 unsigned long len); 131int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, 132 struct vm_area_struct *, struct vm_area_struct *); 133struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma, 134 unsigned long address, unsigned int flags); 135long follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, 136 struct page **, unsigned long *, unsigned long *, 137 long, unsigned int, int *); 138void unmap_hugepage_range(struct vm_area_struct *, 139 unsigned long, unsigned long, struct page *, 140 zap_flags_t); 141void __unmap_hugepage_range_final(struct mmu_gather *tlb, 142 struct vm_area_struct *vma, 143 unsigned long start, unsigned long end, 144 struct page *ref_page, zap_flags_t zap_flags); 145void hugetlb_report_meminfo(struct seq_file *); 146int hugetlb_report_node_meminfo(char *buf, int len, int nid); 147void hugetlb_show_meminfo_node(int nid); 148unsigned long hugetlb_total_pages(void); 149vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, 150 unsigned long address, unsigned int flags); 151#ifdef CONFIG_USERFAULTFD 152int hugetlb_mfill_atomic_pte(pte_t *dst_pte, 153 struct vm_area_struct *dst_vma, 154 unsigned long dst_addr, 155 unsigned long src_addr, 156 uffd_flags_t flags, 157 struct folio **foliop); 158#endif /* CONFIG_USERFAULTFD */ 159bool hugetlb_reserve_pages(struct inode *inode, long from, long to, 160 struct vm_area_struct *vma, 161 vm_flags_t vm_flags); 162long hugetlb_unreserve_pages(struct inode *inode, long start, long end, 163 long freed); 164bool isolate_hugetlb(struct folio *folio, struct list_head *list); 165int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison); 166int get_huge_page_for_hwpoison(unsigned long pfn, int flags, 167 bool *migratable_cleared); 168void folio_putback_active_hugetlb(struct folio *folio); 169void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int reason); 170void free_huge_page(struct page *page); 171void hugetlb_fix_reserve_counts(struct inode *inode); 172extern struct mutex *hugetlb_fault_mutex_table; 173u32 hugetlb_fault_mutex_hash(struct address_space *mapping, pgoff_t idx); 174 175pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma, 176 unsigned long addr, pud_t *pud); 177 178struct address_space *hugetlb_page_mapping_lock_write(struct page *hpage); 179 180extern int sysctl_hugetlb_shm_group; 181extern struct list_head huge_boot_pages; 182 183/* arch callbacks */ 184 185#ifndef CONFIG_HIGHPTE 186/* 187 * pte_offset_huge() and pte_alloc_huge() are helpers for those architectures 188 * which may go down to the lowest PTE level in their huge_pte_offset() and 189 * huge_pte_alloc(): to avoid reliance on pte_offset_map() without pte_unmap(). 190 */ 191static inline pte_t *pte_offset_huge(pmd_t *pmd, unsigned long address) 192{ 193 return pte_offset_kernel(pmd, address); 194} 195static inline pte_t *pte_alloc_huge(struct mm_struct *mm, pmd_t *pmd, 196 unsigned long address) 197{ 198 return pte_alloc(mm, pmd) ? NULL : pte_offset_huge(pmd, address); 199} 200#endif 201 202pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, 203 unsigned long addr, unsigned long sz); 204/* 205 * huge_pte_offset(): Walk the hugetlb pgtable until the last level PTE. 206 * Returns the pte_t* if found, or NULL if the address is not mapped. 207 * 208 * IMPORTANT: we should normally not directly call this function, instead 209 * this is only a common interface to implement arch-specific 210 * walker. Please use hugetlb_walk() instead, because that will attempt to 211 * verify the locking for you. 212 * 213 * Since this function will walk all the pgtable pages (including not only 214 * high-level pgtable page, but also PUD entry that can be unshared 215 * concurrently for VM_SHARED), the caller of this function should be 216 * responsible of its thread safety. One can follow this rule: 217 * 218 * (1) For private mappings: pmd unsharing is not possible, so holding the 219 * mmap_lock for either read or write is sufficient. Most callers 220 * already hold the mmap_lock, so normally, no special action is 221 * required. 222 * 223 * (2) For shared mappings: pmd unsharing is possible (so the PUD-ranged 224 * pgtable page can go away from under us! It can be done by a pmd 225 * unshare with a follow up munmap() on the other process), then we 226 * need either: 227 * 228 * (2.1) hugetlb vma lock read or write held, to make sure pmd unshare 229 * won't happen upon the range (it also makes sure the pte_t we 230 * read is the right and stable one), or, 231 * 232 * (2.2) hugetlb mapping i_mmap_rwsem lock held read or write, to make 233 * sure even if unshare happened the racy unmap() will wait until 234 * i_mmap_rwsem is released. 235 * 236 * Option (2.1) is the safest, which guarantees pte stability from pmd 237 * sharing pov, until the vma lock released. Option (2.2) doesn't protect 238 * a concurrent pmd unshare, but it makes sure the pgtable page is safe to 239 * access. 240 */ 241pte_t *huge_pte_offset(struct mm_struct *mm, 242 unsigned long addr, unsigned long sz); 243unsigned long hugetlb_mask_last_page(struct hstate *h); 244int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, 245 unsigned long addr, pte_t *ptep); 246void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, 247 unsigned long *start, unsigned long *end); 248 249void hugetlb_vma_lock_read(struct vm_area_struct *vma); 250void hugetlb_vma_unlock_read(struct vm_area_struct *vma); 251void hugetlb_vma_lock_write(struct vm_area_struct *vma); 252void hugetlb_vma_unlock_write(struct vm_area_struct *vma); 253int hugetlb_vma_trylock_write(struct vm_area_struct *vma); 254void hugetlb_vma_assert_locked(struct vm_area_struct *vma); 255void hugetlb_vma_lock_release(struct kref *kref); 256 257int pmd_huge(pmd_t pmd); 258int pud_huge(pud_t pud); 259long hugetlb_change_protection(struct vm_area_struct *vma, 260 unsigned long address, unsigned long end, pgprot_t newprot, 261 unsigned long cp_flags); 262 263bool is_hugetlb_entry_migration(pte_t pte); 264void hugetlb_unshare_all_pmds(struct vm_area_struct *vma); 265 266#else /* !CONFIG_HUGETLB_PAGE */ 267 268static inline void hugetlb_dup_vma_private(struct vm_area_struct *vma) 269{ 270} 271 272static inline void clear_vma_resv_huge_pages(struct vm_area_struct *vma) 273{ 274} 275 276static inline unsigned long hugetlb_total_pages(void) 277{ 278 return 0; 279} 280 281static inline struct address_space *hugetlb_page_mapping_lock_write( 282 struct page *hpage) 283{ 284 return NULL; 285} 286 287static inline int huge_pmd_unshare(struct mm_struct *mm, 288 struct vm_area_struct *vma, 289 unsigned long addr, pte_t *ptep) 290{ 291 return 0; 292} 293 294static inline void adjust_range_if_pmd_sharing_possible( 295 struct vm_area_struct *vma, 296 unsigned long *start, unsigned long *end) 297{ 298} 299 300static inline struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma, 301 unsigned long address, unsigned int flags) 302{ 303 BUILD_BUG(); /* should never be compiled in if !CONFIG_HUGETLB_PAGE*/ 304} 305 306static inline long follow_hugetlb_page(struct mm_struct *mm, 307 struct vm_area_struct *vma, struct page **pages, 308 unsigned long *position, unsigned long *nr_pages, 309 long i, unsigned int flags, int *nonblocking) 310{ 311 BUG(); 312 return 0; 313} 314 315static inline int copy_hugetlb_page_range(struct mm_struct *dst, 316 struct mm_struct *src, 317 struct vm_area_struct *dst_vma, 318 struct vm_area_struct *src_vma) 319{ 320 BUG(); 321 return 0; 322} 323 324static inline int move_hugetlb_page_tables(struct vm_area_struct *vma, 325 struct vm_area_struct *new_vma, 326 unsigned long old_addr, 327 unsigned long new_addr, 328 unsigned long len) 329{ 330 BUG(); 331 return 0; 332} 333 334static inline void hugetlb_report_meminfo(struct seq_file *m) 335{ 336} 337 338static inline int hugetlb_report_node_meminfo(char *buf, int len, int nid) 339{ 340 return 0; 341} 342 343static inline void hugetlb_show_meminfo_node(int nid) 344{ 345} 346 347static inline int prepare_hugepage_range(struct file *file, 348 unsigned long addr, unsigned long len) 349{ 350 return -EINVAL; 351} 352 353static inline void hugetlb_vma_lock_read(struct vm_area_struct *vma) 354{ 355} 356 357static inline void hugetlb_vma_unlock_read(struct vm_area_struct *vma) 358{ 359} 360 361static inline void hugetlb_vma_lock_write(struct vm_area_struct *vma) 362{ 363} 364 365static inline void hugetlb_vma_unlock_write(struct vm_area_struct *vma) 366{ 367} 368 369static inline int hugetlb_vma_trylock_write(struct vm_area_struct *vma) 370{ 371 return 1; 372} 373 374static inline void hugetlb_vma_assert_locked(struct vm_area_struct *vma) 375{ 376} 377 378static inline int pmd_huge(pmd_t pmd) 379{ 380 return 0; 381} 382 383static inline int pud_huge(pud_t pud) 384{ 385 return 0; 386} 387 388static inline int is_hugepage_only_range(struct mm_struct *mm, 389 unsigned long addr, unsigned long len) 390{ 391 return 0; 392} 393 394static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, 395 unsigned long addr, unsigned long end, 396 unsigned long floor, unsigned long ceiling) 397{ 398 BUG(); 399} 400 401#ifdef CONFIG_USERFAULTFD 402static inline int hugetlb_mfill_atomic_pte(pte_t *dst_pte, 403 struct vm_area_struct *dst_vma, 404 unsigned long dst_addr, 405 unsigned long src_addr, 406 uffd_flags_t flags, 407 struct folio **foliop) 408{ 409 BUG(); 410 return 0; 411} 412#endif /* CONFIG_USERFAULTFD */ 413 414static inline pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, 415 unsigned long sz) 416{ 417 return NULL; 418} 419 420static inline bool isolate_hugetlb(struct folio *folio, struct list_head *list) 421{ 422 return false; 423} 424 425static inline int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison) 426{ 427 return 0; 428} 429 430static inline int get_huge_page_for_hwpoison(unsigned long pfn, int flags, 431 bool *migratable_cleared) 432{ 433 return 0; 434} 435 436static inline void folio_putback_active_hugetlb(struct folio *folio) 437{ 438} 439 440static inline void move_hugetlb_state(struct folio *old_folio, 441 struct folio *new_folio, int reason) 442{ 443} 444 445static inline long hugetlb_change_protection( 446 struct vm_area_struct *vma, unsigned long address, 447 unsigned long end, pgprot_t newprot, 448 unsigned long cp_flags) 449{ 450 return 0; 451} 452 453static inline void __unmap_hugepage_range_final(struct mmu_gather *tlb, 454 struct vm_area_struct *vma, unsigned long start, 455 unsigned long end, struct page *ref_page, 456 zap_flags_t zap_flags) 457{ 458 BUG(); 459} 460 461static inline vm_fault_t hugetlb_fault(struct mm_struct *mm, 462 struct vm_area_struct *vma, unsigned long address, 463 unsigned int flags) 464{ 465 BUG(); 466 return 0; 467} 468 469static inline void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { } 470 471#endif /* !CONFIG_HUGETLB_PAGE */ 472/* 473 * hugepages at page global directory. If arch support 474 * hugepages at pgd level, they need to define this. 475 */ 476#ifndef pgd_huge 477#define pgd_huge(x) 0 478#endif 479#ifndef p4d_huge 480#define p4d_huge(x) 0 481#endif 482 483#ifndef pgd_write 484static inline int pgd_write(pgd_t pgd) 485{ 486 BUG(); 487 return 0; 488} 489#endif 490 491#define HUGETLB_ANON_FILE "anon_hugepage" 492 493enum { 494 /* 495 * The file will be used as an shm file so shmfs accounting rules 496 * apply 497 */ 498 HUGETLB_SHMFS_INODE = 1, 499 /* 500 * The file is being created on the internal vfs mount and shmfs 501 * accounting rules do not apply 502 */ 503 HUGETLB_ANONHUGE_INODE = 2, 504}; 505 506#ifdef CONFIG_HUGETLBFS 507struct hugetlbfs_sb_info { 508 long max_inodes; /* inodes allowed */ 509 long free_inodes; /* inodes free */ 510 spinlock_t stat_lock; 511 struct hstate *hstate; 512 struct hugepage_subpool *spool; 513 kuid_t uid; 514 kgid_t gid; 515 umode_t mode; 516}; 517 518static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb) 519{ 520 return sb->s_fs_info; 521} 522 523struct hugetlbfs_inode_info { 524 struct shared_policy policy; 525 struct inode vfs_inode; 526 unsigned int seals; 527}; 528 529static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode) 530{ 531 return container_of(inode, struct hugetlbfs_inode_info, vfs_inode); 532} 533 534extern const struct file_operations hugetlbfs_file_operations; 535extern const struct vm_operations_struct hugetlb_vm_ops; 536struct file *hugetlb_file_setup(const char *name, size_t size, vm_flags_t acct, 537 int creat_flags, int page_size_log); 538 539static inline bool is_file_hugepages(struct file *file) 540{ 541 if (file->f_op == &hugetlbfs_file_operations) 542 return true; 543 544 return is_file_shm_hugepages(file); 545} 546 547static inline struct hstate *hstate_inode(struct inode *i) 548{ 549 return HUGETLBFS_SB(i->i_sb)->hstate; 550} 551#else /* !CONFIG_HUGETLBFS */ 552 553#define is_file_hugepages(file) false 554static inline struct file * 555hugetlb_file_setup(const char *name, size_t size, vm_flags_t acctflag, 556 int creat_flags, int page_size_log) 557{ 558 return ERR_PTR(-ENOSYS); 559} 560 561static inline struct hstate *hstate_inode(struct inode *i) 562{ 563 return NULL; 564} 565#endif /* !CONFIG_HUGETLBFS */ 566 567#ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA 568unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, 569 unsigned long len, unsigned long pgoff, 570 unsigned long flags); 571#endif /* HAVE_ARCH_HUGETLB_UNMAPPED_AREA */ 572 573unsigned long 574generic_hugetlb_get_unmapped_area(struct file *file, unsigned long addr, 575 unsigned long len, unsigned long pgoff, 576 unsigned long flags); 577 578/* 579 * huegtlb page specific state flags. These flags are located in page.private 580 * of the hugetlb head page. Functions created via the below macros should be 581 * used to manipulate these flags. 582 * 583 * HPG_restore_reserve - Set when a hugetlb page consumes a reservation at 584 * allocation time. Cleared when page is fully instantiated. Free 585 * routine checks flag to restore a reservation on error paths. 586 * Synchronization: Examined or modified by code that knows it has 587 * the only reference to page. i.e. After allocation but before use 588 * or when the page is being freed. 589 * HPG_migratable - Set after a newly allocated page is added to the page 590 * cache and/or page tables. Indicates the page is a candidate for 591 * migration. 592 * Synchronization: Initially set after new page allocation with no 593 * locking. When examined and modified during migration processing 594 * (isolate, migrate, putback) the hugetlb_lock is held. 595 * HPG_temporary - Set on a page that is temporarily allocated from the buddy 596 * allocator. Typically used for migration target pages when no pages 597 * are available in the pool. The hugetlb free page path will 598 * immediately free pages with this flag set to the buddy allocator. 599 * Synchronization: Can be set after huge page allocation from buddy when 600 * code knows it has only reference. All other examinations and 601 * modifications require hugetlb_lock. 602 * HPG_freed - Set when page is on the free lists. 603 * Synchronization: hugetlb_lock held for examination and modification. 604 * HPG_vmemmap_optimized - Set when the vmemmap pages of the page are freed. 605 * HPG_raw_hwp_unreliable - Set when the hugetlb page has a hwpoison sub-page 606 * that is not tracked by raw_hwp_page list. 607 */ 608enum hugetlb_page_flags { 609 HPG_restore_reserve = 0, 610 HPG_migratable, 611 HPG_temporary, 612 HPG_freed, 613 HPG_vmemmap_optimized, 614 HPG_raw_hwp_unreliable, 615 __NR_HPAGEFLAGS, 616}; 617 618/* 619 * Macros to create test, set and clear function definitions for 620 * hugetlb specific page flags. 621 */ 622#ifdef CONFIG_HUGETLB_PAGE 623#define TESTHPAGEFLAG(uname, flname) \ 624static __always_inline \ 625bool folio_test_hugetlb_##flname(struct folio *folio) \ 626 { void *private = &folio->private; \ 627 return test_bit(HPG_##flname, private); \ 628 } \ 629static inline int HPage##uname(struct page *page) \ 630 { return test_bit(HPG_##flname, &(page->private)); } 631 632#define SETHPAGEFLAG(uname, flname) \ 633static __always_inline \ 634void folio_set_hugetlb_##flname(struct folio *folio) \ 635 { void *private = &folio->private; \ 636 set_bit(HPG_##flname, private); \ 637 } \ 638static inline void SetHPage##uname(struct page *page) \ 639 { set_bit(HPG_##flname, &(page->private)); } 640 641#define CLEARHPAGEFLAG(uname, flname) \ 642static __always_inline \ 643void folio_clear_hugetlb_##flname(struct folio *folio) \ 644 { void *private = &folio->private; \ 645 clear_bit(HPG_##flname, private); \ 646 } \ 647static inline void ClearHPage##uname(struct page *page) \ 648 { clear_bit(HPG_##flname, &(page->private)); } 649#else 650#define TESTHPAGEFLAG(uname, flname) \ 651static inline bool \ 652folio_test_hugetlb_##flname(struct folio *folio) \ 653 { return 0; } \ 654static inline int HPage##uname(struct page *page) \ 655 { return 0; } 656 657#define SETHPAGEFLAG(uname, flname) \ 658static inline void \ 659folio_set_hugetlb_##flname(struct folio *folio) \ 660 { } \ 661static inline void SetHPage##uname(struct page *page) \ 662 { } 663 664#define CLEARHPAGEFLAG(uname, flname) \ 665static inline void \ 666folio_clear_hugetlb_##flname(struct folio *folio) \ 667 { } \ 668static inline void ClearHPage##uname(struct page *page) \ 669 { } 670#endif 671 672#define HPAGEFLAG(uname, flname) \ 673 TESTHPAGEFLAG(uname, flname) \ 674 SETHPAGEFLAG(uname, flname) \ 675 CLEARHPAGEFLAG(uname, flname) \ 676 677/* 678 * Create functions associated with hugetlb page flags 679 */ 680HPAGEFLAG(RestoreReserve, restore_reserve) 681HPAGEFLAG(Migratable, migratable) 682HPAGEFLAG(Temporary, temporary) 683HPAGEFLAG(Freed, freed) 684HPAGEFLAG(VmemmapOptimized, vmemmap_optimized) 685HPAGEFLAG(RawHwpUnreliable, raw_hwp_unreliable) 686 687#ifdef CONFIG_HUGETLB_PAGE 688 689#define HSTATE_NAME_LEN 32 690/* Defines one hugetlb page size */ 691struct hstate { 692 struct mutex resize_lock; 693 int next_nid_to_alloc; 694 int next_nid_to_free; 695 unsigned int order; 696 unsigned int demote_order; 697 unsigned long mask; 698 unsigned long max_huge_pages; 699 unsigned long nr_huge_pages; 700 unsigned long free_huge_pages; 701 unsigned long resv_huge_pages; 702 unsigned long surplus_huge_pages; 703 unsigned long nr_overcommit_huge_pages; 704 struct list_head hugepage_activelist; 705 struct list_head hugepage_freelists[MAX_NUMNODES]; 706 unsigned int max_huge_pages_node[MAX_NUMNODES]; 707 unsigned int nr_huge_pages_node[MAX_NUMNODES]; 708 unsigned int free_huge_pages_node[MAX_NUMNODES]; 709 unsigned int surplus_huge_pages_node[MAX_NUMNODES]; 710#ifdef CONFIG_CGROUP_HUGETLB 711 /* cgroup control files */ 712 struct cftype cgroup_files_dfl[8]; 713 struct cftype cgroup_files_legacy[10]; 714#endif 715 char name[HSTATE_NAME_LEN]; 716}; 717 718struct huge_bootmem_page { 719 struct list_head list; 720 struct hstate *hstate; 721}; 722 723int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list); 724struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, 725 unsigned long addr, int avoid_reserve); 726struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid, 727 nodemask_t *nmask, gfp_t gfp_mask); 728struct folio *alloc_hugetlb_folio_vma(struct hstate *h, struct vm_area_struct *vma, 729 unsigned long address); 730int hugetlb_add_to_page_cache(struct folio *folio, struct address_space *mapping, 731 pgoff_t idx); 732void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma, 733 unsigned long address, struct folio *folio); 734 735/* arch callback */ 736int __init __alloc_bootmem_huge_page(struct hstate *h, int nid); 737int __init alloc_bootmem_huge_page(struct hstate *h, int nid); 738bool __init hugetlb_node_alloc_supported(void); 739 740void __init hugetlb_add_hstate(unsigned order); 741bool __init arch_hugetlb_valid_size(unsigned long size); 742struct hstate *size_to_hstate(unsigned long size); 743 744#ifndef HUGE_MAX_HSTATE 745#define HUGE_MAX_HSTATE 1 746#endif 747 748extern struct hstate hstates[HUGE_MAX_HSTATE]; 749extern unsigned int default_hstate_idx; 750 751#define default_hstate (hstates[default_hstate_idx]) 752 753static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio) 754{ 755 return folio->_hugetlb_subpool; 756} 757 758static inline void hugetlb_set_folio_subpool(struct folio *folio, 759 struct hugepage_subpool *subpool) 760{ 761 folio->_hugetlb_subpool = subpool; 762} 763 764static inline struct hstate *hstate_file(struct file *f) 765{ 766 return hstate_inode(file_inode(f)); 767} 768 769static inline struct hstate *hstate_sizelog(int page_size_log) 770{ 771 if (!page_size_log) 772 return &default_hstate; 773 774 if (page_size_log < BITS_PER_LONG) 775 return size_to_hstate(1UL << page_size_log); 776 777 return NULL; 778} 779 780static inline struct hstate *hstate_vma(struct vm_area_struct *vma) 781{ 782 return hstate_file(vma->vm_file); 783} 784 785static inline unsigned long huge_page_size(const struct hstate *h) 786{ 787 return (unsigned long)PAGE_SIZE << h->order; 788} 789 790extern unsigned long vma_kernel_pagesize(struct vm_area_struct *vma); 791 792extern unsigned long vma_mmu_pagesize(struct vm_area_struct *vma); 793 794static inline unsigned long huge_page_mask(struct hstate *h) 795{ 796 return h->mask; 797} 798 799static inline unsigned int huge_page_order(struct hstate *h) 800{ 801 return h->order; 802} 803 804static inline unsigned huge_page_shift(struct hstate *h) 805{ 806 return h->order + PAGE_SHIFT; 807} 808 809static inline bool hstate_is_gigantic(struct hstate *h) 810{ 811 return huge_page_order(h) > MAX_ORDER; 812} 813 814static inline unsigned int pages_per_huge_page(const struct hstate *h) 815{ 816 return 1 << h->order; 817} 818 819static inline unsigned int blocks_per_huge_page(struct hstate *h) 820{ 821 return huge_page_size(h) / 512; 822} 823 824#include <asm/hugetlb.h> 825 826#ifndef is_hugepage_only_range 827static inline int is_hugepage_only_range(struct mm_struct *mm, 828 unsigned long addr, unsigned long len) 829{ 830 return 0; 831} 832#define is_hugepage_only_range is_hugepage_only_range 833#endif 834 835#ifndef arch_clear_hugepage_flags 836static inline void arch_clear_hugepage_flags(struct page *page) { } 837#define arch_clear_hugepage_flags arch_clear_hugepage_flags 838#endif 839 840#ifndef arch_make_huge_pte 841static inline pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, 842 vm_flags_t flags) 843{ 844 return pte_mkhuge(entry); 845} 846#endif 847 848static inline struct hstate *folio_hstate(struct folio *folio) 849{ 850 VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio); 851 return size_to_hstate(folio_size(folio)); 852} 853 854static inline struct hstate *page_hstate(struct page *page) 855{ 856 return folio_hstate(page_folio(page)); 857} 858 859static inline unsigned hstate_index_to_shift(unsigned index) 860{ 861 return hstates[index].order + PAGE_SHIFT; 862} 863 864static inline int hstate_index(struct hstate *h) 865{ 866 return h - hstates; 867} 868 869extern int dissolve_free_huge_page(struct page *page); 870extern int dissolve_free_huge_pages(unsigned long start_pfn, 871 unsigned long end_pfn); 872 873#ifdef CONFIG_MEMORY_FAILURE 874extern void folio_clear_hugetlb_hwpoison(struct folio *folio); 875#else 876static inline void folio_clear_hugetlb_hwpoison(struct folio *folio) 877{ 878} 879#endif 880 881#ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION 882#ifndef arch_hugetlb_migration_supported 883static inline bool arch_hugetlb_migration_supported(struct hstate *h) 884{ 885 if ((huge_page_shift(h) == PMD_SHIFT) || 886 (huge_page_shift(h) == PUD_SHIFT) || 887 (huge_page_shift(h) == PGDIR_SHIFT)) 888 return true; 889 else 890 return false; 891} 892#endif 893#else 894static inline bool arch_hugetlb_migration_supported(struct hstate *h) 895{ 896 return false; 897} 898#endif 899 900static inline bool hugepage_migration_supported(struct hstate *h) 901{ 902 return arch_hugetlb_migration_supported(h); 903} 904 905/* 906 * Movability check is different as compared to migration check. 907 * It determines whether or not a huge page should be placed on 908 * movable zone or not. Movability of any huge page should be 909 * required only if huge page size is supported for migration. 910 * There won't be any reason for the huge page to be movable if 911 * it is not migratable to start with. Also the size of the huge 912 * page should be large enough to be placed under a movable zone 913 * and still feasible enough to be migratable. Just the presence 914 * in movable zone does not make the migration feasible. 915 * 916 * So even though large huge page sizes like the gigantic ones 917 * are migratable they should not be movable because its not 918 * feasible to migrate them from movable zone. 919 */ 920static inline bool hugepage_movable_supported(struct hstate *h) 921{ 922 if (!hugepage_migration_supported(h)) 923 return false; 924 925 if (hstate_is_gigantic(h)) 926 return false; 927 return true; 928} 929 930/* Movability of hugepages depends on migration support. */ 931static inline gfp_t htlb_alloc_mask(struct hstate *h) 932{ 933 if (hugepage_movable_supported(h)) 934 return GFP_HIGHUSER_MOVABLE; 935 else 936 return GFP_HIGHUSER; 937} 938 939static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask) 940{ 941 gfp_t modified_mask = htlb_alloc_mask(h); 942 943 /* Some callers might want to enforce node */ 944 modified_mask |= (gfp_mask & __GFP_THISNODE); 945 946 modified_mask |= (gfp_mask & __GFP_NOWARN); 947 948 return modified_mask; 949} 950 951static inline spinlock_t *huge_pte_lockptr(struct hstate *h, 952 struct mm_struct *mm, pte_t *pte) 953{ 954 if (huge_page_size(h) == PMD_SIZE) 955 return pmd_lockptr(mm, (pmd_t *) pte); 956 VM_BUG_ON(huge_page_size(h) == PAGE_SIZE); 957 return &mm->page_table_lock; 958} 959 960#ifndef hugepages_supported 961/* 962 * Some platform decide whether they support huge pages at boot 963 * time. Some of them, such as powerpc, set HPAGE_SHIFT to 0 964 * when there is no such support 965 */ 966#define hugepages_supported() (HPAGE_SHIFT != 0) 967#endif 968 969void hugetlb_report_usage(struct seq_file *m, struct mm_struct *mm); 970 971static inline void hugetlb_count_init(struct mm_struct *mm) 972{ 973 atomic_long_set(&mm->hugetlb_usage, 0); 974} 975 976static inline void hugetlb_count_add(long l, struct mm_struct *mm) 977{ 978 atomic_long_add(l, &mm->hugetlb_usage); 979} 980 981static inline void hugetlb_count_sub(long l, struct mm_struct *mm) 982{ 983 atomic_long_sub(l, &mm->hugetlb_usage); 984} 985 986#ifndef huge_ptep_modify_prot_start 987#define huge_ptep_modify_prot_start huge_ptep_modify_prot_start 988static inline pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, 989 unsigned long addr, pte_t *ptep) 990{ 991 return huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); 992} 993#endif 994 995#ifndef huge_ptep_modify_prot_commit 996#define huge_ptep_modify_prot_commit huge_ptep_modify_prot_commit 997static inline void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, 998 unsigned long addr, pte_t *ptep, 999 pte_t old_pte, pte_t pte) 1000{ 1001 set_huge_pte_at(vma->vm_mm, addr, ptep, pte); 1002} 1003#endif 1004 1005#ifdef CONFIG_NUMA 1006void hugetlb_register_node(struct node *node); 1007void hugetlb_unregister_node(struct node *node); 1008#endif 1009 1010#else /* CONFIG_HUGETLB_PAGE */ 1011struct hstate {}; 1012 1013static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio) 1014{ 1015 return NULL; 1016} 1017 1018static inline int isolate_or_dissolve_huge_page(struct page *page, 1019 struct list_head *list) 1020{ 1021 return -ENOMEM; 1022} 1023 1024static inline struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, 1025 unsigned long addr, 1026 int avoid_reserve) 1027{ 1028 return NULL; 1029} 1030 1031static inline struct folio * 1032alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid, 1033 nodemask_t *nmask, gfp_t gfp_mask) 1034{ 1035 return NULL; 1036} 1037 1038static inline struct folio *alloc_hugetlb_folio_vma(struct hstate *h, 1039 struct vm_area_struct *vma, 1040 unsigned long address) 1041{ 1042 return NULL; 1043} 1044 1045static inline int __alloc_bootmem_huge_page(struct hstate *h) 1046{ 1047 return 0; 1048} 1049 1050static inline struct hstate *hstate_file(struct file *f) 1051{ 1052 return NULL; 1053} 1054 1055static inline struct hstate *hstate_sizelog(int page_size_log) 1056{ 1057 return NULL; 1058} 1059 1060static inline struct hstate *hstate_vma(struct vm_area_struct *vma) 1061{ 1062 return NULL; 1063} 1064 1065static inline struct hstate *folio_hstate(struct folio *folio) 1066{ 1067 return NULL; 1068} 1069 1070static inline struct hstate *page_hstate(struct page *page) 1071{ 1072 return NULL; 1073} 1074 1075static inline struct hstate *size_to_hstate(unsigned long size) 1076{ 1077 return NULL; 1078} 1079 1080static inline unsigned long huge_page_size(struct hstate *h) 1081{ 1082 return PAGE_SIZE; 1083} 1084 1085static inline unsigned long huge_page_mask(struct hstate *h) 1086{ 1087 return PAGE_MASK; 1088} 1089 1090static inline unsigned long vma_kernel_pagesize(struct vm_area_struct *vma) 1091{ 1092 return PAGE_SIZE; 1093} 1094 1095static inline unsigned long vma_mmu_pagesize(struct vm_area_struct *vma) 1096{ 1097 return PAGE_SIZE; 1098} 1099 1100static inline unsigned int huge_page_order(struct hstate *h) 1101{ 1102 return 0; 1103} 1104 1105static inline unsigned int huge_page_shift(struct hstate *h) 1106{ 1107 return PAGE_SHIFT; 1108} 1109 1110static inline bool hstate_is_gigantic(struct hstate *h) 1111{ 1112 return false; 1113} 1114 1115static inline unsigned int pages_per_huge_page(struct hstate *h) 1116{ 1117 return 1; 1118} 1119 1120static inline unsigned hstate_index_to_shift(unsigned index) 1121{ 1122 return 0; 1123} 1124 1125static inline int hstate_index(struct hstate *h) 1126{ 1127 return 0; 1128} 1129 1130static inline int dissolve_free_huge_page(struct page *page) 1131{ 1132 return 0; 1133} 1134 1135static inline int dissolve_free_huge_pages(unsigned long start_pfn, 1136 unsigned long end_pfn) 1137{ 1138 return 0; 1139} 1140 1141static inline bool hugepage_migration_supported(struct hstate *h) 1142{ 1143 return false; 1144} 1145 1146static inline bool hugepage_movable_supported(struct hstate *h) 1147{ 1148 return false; 1149} 1150 1151static inline gfp_t htlb_alloc_mask(struct hstate *h) 1152{ 1153 return 0; 1154} 1155 1156static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask) 1157{ 1158 return 0; 1159} 1160 1161static inline spinlock_t *huge_pte_lockptr(struct hstate *h, 1162 struct mm_struct *mm, pte_t *pte) 1163{ 1164 return &mm->page_table_lock; 1165} 1166 1167static inline void hugetlb_count_init(struct mm_struct *mm) 1168{ 1169} 1170 1171static inline void hugetlb_report_usage(struct seq_file *f, struct mm_struct *m) 1172{ 1173} 1174 1175static inline void hugetlb_count_sub(long l, struct mm_struct *mm) 1176{ 1177} 1178 1179static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, 1180 unsigned long addr, pte_t *ptep) 1181{ 1182#ifdef CONFIG_MMU 1183 return ptep_get(ptep); 1184#else 1185 return *ptep; 1186#endif 1187} 1188 1189static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, 1190 pte_t *ptep, pte_t pte) 1191{ 1192} 1193 1194static inline void hugetlb_register_node(struct node *node) 1195{ 1196} 1197 1198static inline void hugetlb_unregister_node(struct node *node) 1199{ 1200} 1201#endif /* CONFIG_HUGETLB_PAGE */ 1202 1203static inline spinlock_t *huge_pte_lock(struct hstate *h, 1204 struct mm_struct *mm, pte_t *pte) 1205{ 1206 spinlock_t *ptl; 1207 1208 ptl = huge_pte_lockptr(h, mm, pte); 1209 spin_lock(ptl); 1210 return ptl; 1211} 1212 1213#if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_CMA) 1214extern void __init hugetlb_cma_reserve(int order); 1215#else 1216static inline __init void hugetlb_cma_reserve(int order) 1217{ 1218} 1219#endif 1220 1221#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE 1222static inline bool hugetlb_pmd_shared(pte_t *pte) 1223{ 1224 return page_count(virt_to_page(pte)) > 1; 1225} 1226#else 1227static inline bool hugetlb_pmd_shared(pte_t *pte) 1228{ 1229 return false; 1230} 1231#endif 1232 1233bool want_pmd_share(struct vm_area_struct *vma, unsigned long addr); 1234 1235#ifndef __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE 1236/* 1237 * ARCHes with special requirements for evicting HUGETLB backing TLB entries can 1238 * implement this. 1239 */ 1240#define flush_hugetlb_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end) 1241#endif 1242 1243static inline bool __vma_shareable_lock(struct vm_area_struct *vma) 1244{ 1245 return (vma->vm_flags & VM_MAYSHARE) && vma->vm_private_data; 1246} 1247 1248/* 1249 * Safe version of huge_pte_offset() to check the locks. See comments 1250 * above huge_pte_offset(). 1251 */ 1252static inline pte_t * 1253hugetlb_walk(struct vm_area_struct *vma, unsigned long addr, unsigned long sz) 1254{ 1255#if defined(CONFIG_HUGETLB_PAGE) && \ 1256 defined(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) && defined(CONFIG_LOCKDEP) 1257 struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; 1258 1259 /* 1260 * If pmd sharing possible, locking needed to safely walk the 1261 * hugetlb pgtables. More information can be found at the comment 1262 * above huge_pte_offset() in the same file. 1263 * 1264 * NOTE: lockdep_is_held() is only defined with CONFIG_LOCKDEP. 1265 */ 1266 if (__vma_shareable_lock(vma)) 1267 WARN_ON_ONCE(!lockdep_is_held(&vma_lock->rw_sema) && 1268 !lockdep_is_held( 1269 &vma->vm_file->f_mapping->i_mmap_rwsem)); 1270#endif 1271 return huge_pte_offset(vma->vm_mm, addr, sz); 1272} 1273 1274#endif /* _LINUX_HUGETLB_H */