at v6.3 1285 lines 35 kB view raw
1/* SPDX-License-Identifier: GPL-2.0 */ 2#ifndef _LINUX_HUGETLB_H 3#define _LINUX_HUGETLB_H 4 5#include <linux/mm.h> 6#include <linux/mm_types.h> 7#include <linux/mmdebug.h> 8#include <linux/fs.h> 9#include <linux/hugetlb_inline.h> 10#include <linux/cgroup.h> 11#include <linux/page_ref.h> 12#include <linux/list.h> 13#include <linux/kref.h> 14#include <linux/pgtable.h> 15#include <linux/gfp.h> 16#include <linux/userfaultfd_k.h> 17 18struct ctl_table; 19struct user_struct; 20struct mmu_gather; 21struct node; 22 23#ifndef CONFIG_ARCH_HAS_HUGEPD 24typedef struct { unsigned long pd; } hugepd_t; 25#define is_hugepd(hugepd) (0) 26#define __hugepd(x) ((hugepd_t) { (x) }) 27#endif 28 29#ifdef CONFIG_HUGETLB_PAGE 30 31#include <linux/mempolicy.h> 32#include <linux/shm.h> 33#include <asm/tlbflush.h> 34 35/* 36 * For HugeTLB page, there are more metadata to save in the struct page. But 37 * the head struct page cannot meet our needs, so we have to abuse other tail 38 * struct page to store the metadata. 39 */ 40#define __NR_USED_SUBPAGE 3 41 42struct hugepage_subpool { 43 spinlock_t lock; 44 long count; 45 long max_hpages; /* Maximum huge pages or -1 if no maximum. */ 46 long used_hpages; /* Used count against maximum, includes */ 47 /* both allocated and reserved pages. */ 48 struct hstate *hstate; 49 long min_hpages; /* Minimum huge pages or -1 if no minimum. */ 50 long rsv_hpages; /* Pages reserved against global pool to */ 51 /* satisfy minimum size. */ 52}; 53 54struct resv_map { 55 struct kref refs; 56 spinlock_t lock; 57 struct list_head regions; 58 long adds_in_progress; 59 struct list_head region_cache; 60 long region_cache_count; 61#ifdef CONFIG_CGROUP_HUGETLB 62 /* 63 * On private mappings, the counter to uncharge reservations is stored 64 * here. If these fields are 0, then either the mapping is shared, or 65 * cgroup accounting is disabled for this resv_map. 66 */ 67 struct page_counter *reservation_counter; 68 unsigned long pages_per_hpage; 69 struct cgroup_subsys_state *css; 70#endif 71}; 72 73/* 74 * Region tracking -- allows tracking of reservations and instantiated pages 75 * across the pages in a mapping. 76 * 77 * The region data structures are embedded into a resv_map and protected 78 * by a resv_map's lock. The set of regions within the resv_map represent 79 * reservations for huge pages, or huge pages that have already been 80 * instantiated within the map. The from and to elements are huge page 81 * indices into the associated mapping. from indicates the starting index 82 * of the region. to represents the first index past the end of the region. 83 * 84 * For example, a file region structure with from == 0 and to == 4 represents 85 * four huge pages in a mapping. It is important to note that the to element 86 * represents the first element past the end of the region. This is used in 87 * arithmetic as 4(to) - 0(from) = 4 huge pages in the region. 88 * 89 * Interval notation of the form [from, to) will be used to indicate that 90 * the endpoint from is inclusive and to is exclusive. 91 */ 92struct file_region { 93 struct list_head link; 94 long from; 95 long to; 96#ifdef CONFIG_CGROUP_HUGETLB 97 /* 98 * On shared mappings, each reserved region appears as a struct 99 * file_region in resv_map. These fields hold the info needed to 100 * uncharge each reservation. 101 */ 102 struct page_counter *reservation_counter; 103 struct cgroup_subsys_state *css; 104#endif 105}; 106 107struct hugetlb_vma_lock { 108 struct kref refs; 109 struct rw_semaphore rw_sema; 110 struct vm_area_struct *vma; 111}; 112 113extern struct resv_map *resv_map_alloc(void); 114void resv_map_release(struct kref *ref); 115 116extern spinlock_t hugetlb_lock; 117extern int hugetlb_max_hstate __read_mostly; 118#define for_each_hstate(h) \ 119 for ((h) = hstates; (h) < &hstates[hugetlb_max_hstate]; (h)++) 120 121struct hugepage_subpool *hugepage_new_subpool(struct hstate *h, long max_hpages, 122 long min_hpages); 123void hugepage_put_subpool(struct hugepage_subpool *spool); 124 125void hugetlb_dup_vma_private(struct vm_area_struct *vma); 126void clear_vma_resv_huge_pages(struct vm_area_struct *vma); 127int hugetlb_sysctl_handler(struct ctl_table *, int, void *, size_t *, loff_t *); 128int hugetlb_overcommit_handler(struct ctl_table *, int, void *, size_t *, 129 loff_t *); 130int hugetlb_treat_movable_handler(struct ctl_table *, int, void *, size_t *, 131 loff_t *); 132int hugetlb_mempolicy_sysctl_handler(struct ctl_table *, int, void *, size_t *, 133 loff_t *); 134 135int move_hugetlb_page_tables(struct vm_area_struct *vma, 136 struct vm_area_struct *new_vma, 137 unsigned long old_addr, unsigned long new_addr, 138 unsigned long len); 139int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, 140 struct vm_area_struct *, struct vm_area_struct *); 141struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma, 142 unsigned long address, unsigned int flags); 143long follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, 144 struct page **, struct vm_area_struct **, 145 unsigned long *, unsigned long *, long, unsigned int, 146 int *); 147void unmap_hugepage_range(struct vm_area_struct *, 148 unsigned long, unsigned long, struct page *, 149 zap_flags_t); 150void __unmap_hugepage_range_final(struct mmu_gather *tlb, 151 struct vm_area_struct *vma, 152 unsigned long start, unsigned long end, 153 struct page *ref_page, zap_flags_t zap_flags); 154void hugetlb_report_meminfo(struct seq_file *); 155int hugetlb_report_node_meminfo(char *buf, int len, int nid); 156void hugetlb_show_meminfo_node(int nid); 157unsigned long hugetlb_total_pages(void); 158vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, 159 unsigned long address, unsigned int flags); 160#ifdef CONFIG_USERFAULTFD 161int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, pte_t *dst_pte, 162 struct vm_area_struct *dst_vma, 163 unsigned long dst_addr, 164 unsigned long src_addr, 165 enum mcopy_atomic_mode mode, 166 struct page **pagep, 167 bool wp_copy); 168#endif /* CONFIG_USERFAULTFD */ 169bool hugetlb_reserve_pages(struct inode *inode, long from, long to, 170 struct vm_area_struct *vma, 171 vm_flags_t vm_flags); 172long hugetlb_unreserve_pages(struct inode *inode, long start, long end, 173 long freed); 174bool isolate_hugetlb(struct folio *folio, struct list_head *list); 175int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison); 176int get_huge_page_for_hwpoison(unsigned long pfn, int flags, 177 bool *migratable_cleared); 178void folio_putback_active_hugetlb(struct folio *folio); 179void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int reason); 180void free_huge_page(struct page *page); 181void hugetlb_fix_reserve_counts(struct inode *inode); 182extern struct mutex *hugetlb_fault_mutex_table; 183u32 hugetlb_fault_mutex_hash(struct address_space *mapping, pgoff_t idx); 184 185pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma, 186 unsigned long addr, pud_t *pud); 187 188struct address_space *hugetlb_page_mapping_lock_write(struct page *hpage); 189 190extern int sysctl_hugetlb_shm_group; 191extern struct list_head huge_boot_pages; 192 193/* arch callbacks */ 194 195pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, 196 unsigned long addr, unsigned long sz); 197/* 198 * huge_pte_offset(): Walk the hugetlb pgtable until the last level PTE. 199 * Returns the pte_t* if found, or NULL if the address is not mapped. 200 * 201 * IMPORTANT: we should normally not directly call this function, instead 202 * this is only a common interface to implement arch-specific 203 * walker. Please use hugetlb_walk() instead, because that will attempt to 204 * verify the locking for you. 205 * 206 * Since this function will walk all the pgtable pages (including not only 207 * high-level pgtable page, but also PUD entry that can be unshared 208 * concurrently for VM_SHARED), the caller of this function should be 209 * responsible of its thread safety. One can follow this rule: 210 * 211 * (1) For private mappings: pmd unsharing is not possible, so holding the 212 * mmap_lock for either read or write is sufficient. Most callers 213 * already hold the mmap_lock, so normally, no special action is 214 * required. 215 * 216 * (2) For shared mappings: pmd unsharing is possible (so the PUD-ranged 217 * pgtable page can go away from under us! It can be done by a pmd 218 * unshare with a follow up munmap() on the other process), then we 219 * need either: 220 * 221 * (2.1) hugetlb vma lock read or write held, to make sure pmd unshare 222 * won't happen upon the range (it also makes sure the pte_t we 223 * read is the right and stable one), or, 224 * 225 * (2.2) hugetlb mapping i_mmap_rwsem lock held read or write, to make 226 * sure even if unshare happened the racy unmap() will wait until 227 * i_mmap_rwsem is released. 228 * 229 * Option (2.1) is the safest, which guarantees pte stability from pmd 230 * sharing pov, until the vma lock released. Option (2.2) doesn't protect 231 * a concurrent pmd unshare, but it makes sure the pgtable page is safe to 232 * access. 233 */ 234pte_t *huge_pte_offset(struct mm_struct *mm, 235 unsigned long addr, unsigned long sz); 236unsigned long hugetlb_mask_last_page(struct hstate *h); 237int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, 238 unsigned long addr, pte_t *ptep); 239void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, 240 unsigned long *start, unsigned long *end); 241 242void hugetlb_vma_lock_read(struct vm_area_struct *vma); 243void hugetlb_vma_unlock_read(struct vm_area_struct *vma); 244void hugetlb_vma_lock_write(struct vm_area_struct *vma); 245void hugetlb_vma_unlock_write(struct vm_area_struct *vma); 246int hugetlb_vma_trylock_write(struct vm_area_struct *vma); 247void hugetlb_vma_assert_locked(struct vm_area_struct *vma); 248void hugetlb_vma_lock_release(struct kref *kref); 249 250int pmd_huge(pmd_t pmd); 251int pud_huge(pud_t pud); 252long hugetlb_change_protection(struct vm_area_struct *vma, 253 unsigned long address, unsigned long end, pgprot_t newprot, 254 unsigned long cp_flags); 255 256bool is_hugetlb_entry_migration(pte_t pte); 257void hugetlb_unshare_all_pmds(struct vm_area_struct *vma); 258 259#else /* !CONFIG_HUGETLB_PAGE */ 260 261static inline void hugetlb_dup_vma_private(struct vm_area_struct *vma) 262{ 263} 264 265static inline void clear_vma_resv_huge_pages(struct vm_area_struct *vma) 266{ 267} 268 269static inline unsigned long hugetlb_total_pages(void) 270{ 271 return 0; 272} 273 274static inline struct address_space *hugetlb_page_mapping_lock_write( 275 struct page *hpage) 276{ 277 return NULL; 278} 279 280static inline int huge_pmd_unshare(struct mm_struct *mm, 281 struct vm_area_struct *vma, 282 unsigned long addr, pte_t *ptep) 283{ 284 return 0; 285} 286 287static inline void adjust_range_if_pmd_sharing_possible( 288 struct vm_area_struct *vma, 289 unsigned long *start, unsigned long *end) 290{ 291} 292 293static inline struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma, 294 unsigned long address, unsigned int flags) 295{ 296 BUILD_BUG(); /* should never be compiled in if !CONFIG_HUGETLB_PAGE*/ 297} 298 299static inline long follow_hugetlb_page(struct mm_struct *mm, 300 struct vm_area_struct *vma, struct page **pages, 301 struct vm_area_struct **vmas, unsigned long *position, 302 unsigned long *nr_pages, long i, unsigned int flags, 303 int *nonblocking) 304{ 305 BUG(); 306 return 0; 307} 308 309static inline int copy_hugetlb_page_range(struct mm_struct *dst, 310 struct mm_struct *src, 311 struct vm_area_struct *dst_vma, 312 struct vm_area_struct *src_vma) 313{ 314 BUG(); 315 return 0; 316} 317 318static inline int move_hugetlb_page_tables(struct vm_area_struct *vma, 319 struct vm_area_struct *new_vma, 320 unsigned long old_addr, 321 unsigned long new_addr, 322 unsigned long len) 323{ 324 BUG(); 325 return 0; 326} 327 328static inline void hugetlb_report_meminfo(struct seq_file *m) 329{ 330} 331 332static inline int hugetlb_report_node_meminfo(char *buf, int len, int nid) 333{ 334 return 0; 335} 336 337static inline void hugetlb_show_meminfo_node(int nid) 338{ 339} 340 341static inline int prepare_hugepage_range(struct file *file, 342 unsigned long addr, unsigned long len) 343{ 344 return -EINVAL; 345} 346 347static inline void hugetlb_vma_lock_read(struct vm_area_struct *vma) 348{ 349} 350 351static inline void hugetlb_vma_unlock_read(struct vm_area_struct *vma) 352{ 353} 354 355static inline void hugetlb_vma_lock_write(struct vm_area_struct *vma) 356{ 357} 358 359static inline void hugetlb_vma_unlock_write(struct vm_area_struct *vma) 360{ 361} 362 363static inline int hugetlb_vma_trylock_write(struct vm_area_struct *vma) 364{ 365 return 1; 366} 367 368static inline void hugetlb_vma_assert_locked(struct vm_area_struct *vma) 369{ 370} 371 372static inline int pmd_huge(pmd_t pmd) 373{ 374 return 0; 375} 376 377static inline int pud_huge(pud_t pud) 378{ 379 return 0; 380} 381 382static inline int is_hugepage_only_range(struct mm_struct *mm, 383 unsigned long addr, unsigned long len) 384{ 385 return 0; 386} 387 388static inline void hugetlb_free_pgd_range(struct mmu_gather *tlb, 389 unsigned long addr, unsigned long end, 390 unsigned long floor, unsigned long ceiling) 391{ 392 BUG(); 393} 394 395#ifdef CONFIG_USERFAULTFD 396static inline int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, 397 pte_t *dst_pte, 398 struct vm_area_struct *dst_vma, 399 unsigned long dst_addr, 400 unsigned long src_addr, 401 enum mcopy_atomic_mode mode, 402 struct page **pagep, 403 bool wp_copy) 404{ 405 BUG(); 406 return 0; 407} 408#endif /* CONFIG_USERFAULTFD */ 409 410static inline pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, 411 unsigned long sz) 412{ 413 return NULL; 414} 415 416static inline bool isolate_hugetlb(struct folio *folio, struct list_head *list) 417{ 418 return false; 419} 420 421static inline int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison) 422{ 423 return 0; 424} 425 426static inline int get_huge_page_for_hwpoison(unsigned long pfn, int flags, 427 bool *migratable_cleared) 428{ 429 return 0; 430} 431 432static inline void folio_putback_active_hugetlb(struct folio *folio) 433{ 434} 435 436static inline void move_hugetlb_state(struct folio *old_folio, 437 struct folio *new_folio, int reason) 438{ 439} 440 441static inline long hugetlb_change_protection( 442 struct vm_area_struct *vma, unsigned long address, 443 unsigned long end, pgprot_t newprot, 444 unsigned long cp_flags) 445{ 446 return 0; 447} 448 449static inline void __unmap_hugepage_range_final(struct mmu_gather *tlb, 450 struct vm_area_struct *vma, unsigned long start, 451 unsigned long end, struct page *ref_page, 452 zap_flags_t zap_flags) 453{ 454 BUG(); 455} 456 457static inline vm_fault_t hugetlb_fault(struct mm_struct *mm, 458 struct vm_area_struct *vma, unsigned long address, 459 unsigned int flags) 460{ 461 BUG(); 462 return 0; 463} 464 465static inline void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { } 466 467#endif /* !CONFIG_HUGETLB_PAGE */ 468/* 469 * hugepages at page global directory. If arch support 470 * hugepages at pgd level, they need to define this. 471 */ 472#ifndef pgd_huge 473#define pgd_huge(x) 0 474#endif 475#ifndef p4d_huge 476#define p4d_huge(x) 0 477#endif 478 479#ifndef pgd_write 480static inline int pgd_write(pgd_t pgd) 481{ 482 BUG(); 483 return 0; 484} 485#endif 486 487#define HUGETLB_ANON_FILE "anon_hugepage" 488 489enum { 490 /* 491 * The file will be used as an shm file so shmfs accounting rules 492 * apply 493 */ 494 HUGETLB_SHMFS_INODE = 1, 495 /* 496 * The file is being created on the internal vfs mount and shmfs 497 * accounting rules do not apply 498 */ 499 HUGETLB_ANONHUGE_INODE = 2, 500}; 501 502#ifdef CONFIG_HUGETLBFS 503struct hugetlbfs_sb_info { 504 long max_inodes; /* inodes allowed */ 505 long free_inodes; /* inodes free */ 506 spinlock_t stat_lock; 507 struct hstate *hstate; 508 struct hugepage_subpool *spool; 509 kuid_t uid; 510 kgid_t gid; 511 umode_t mode; 512}; 513 514static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb) 515{ 516 return sb->s_fs_info; 517} 518 519struct hugetlbfs_inode_info { 520 struct shared_policy policy; 521 struct inode vfs_inode; 522 unsigned int seals; 523}; 524 525static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode) 526{ 527 return container_of(inode, struct hugetlbfs_inode_info, vfs_inode); 528} 529 530extern const struct file_operations hugetlbfs_file_operations; 531extern const struct vm_operations_struct hugetlb_vm_ops; 532struct file *hugetlb_file_setup(const char *name, size_t size, vm_flags_t acct, 533 int creat_flags, int page_size_log); 534 535static inline bool is_file_hugepages(struct file *file) 536{ 537 if (file->f_op == &hugetlbfs_file_operations) 538 return true; 539 540 return is_file_shm_hugepages(file); 541} 542 543static inline struct hstate *hstate_inode(struct inode *i) 544{ 545 return HUGETLBFS_SB(i->i_sb)->hstate; 546} 547#else /* !CONFIG_HUGETLBFS */ 548 549#define is_file_hugepages(file) false 550static inline struct file * 551hugetlb_file_setup(const char *name, size_t size, vm_flags_t acctflag, 552 int creat_flags, int page_size_log) 553{ 554 return ERR_PTR(-ENOSYS); 555} 556 557static inline struct hstate *hstate_inode(struct inode *i) 558{ 559 return NULL; 560} 561#endif /* !CONFIG_HUGETLBFS */ 562 563#ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA 564unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, 565 unsigned long len, unsigned long pgoff, 566 unsigned long flags); 567#endif /* HAVE_ARCH_HUGETLB_UNMAPPED_AREA */ 568 569unsigned long 570generic_hugetlb_get_unmapped_area(struct file *file, unsigned long addr, 571 unsigned long len, unsigned long pgoff, 572 unsigned long flags); 573 574/* 575 * huegtlb page specific state flags. These flags are located in page.private 576 * of the hugetlb head page. Functions created via the below macros should be 577 * used to manipulate these flags. 578 * 579 * HPG_restore_reserve - Set when a hugetlb page consumes a reservation at 580 * allocation time. Cleared when page is fully instantiated. Free 581 * routine checks flag to restore a reservation on error paths. 582 * Synchronization: Examined or modified by code that knows it has 583 * the only reference to page. i.e. After allocation but before use 584 * or when the page is being freed. 585 * HPG_migratable - Set after a newly allocated page is added to the page 586 * cache and/or page tables. Indicates the page is a candidate for 587 * migration. 588 * Synchronization: Initially set after new page allocation with no 589 * locking. When examined and modified during migration processing 590 * (isolate, migrate, putback) the hugetlb_lock is held. 591 * HPG_temporary - Set on a page that is temporarily allocated from the buddy 592 * allocator. Typically used for migration target pages when no pages 593 * are available in the pool. The hugetlb free page path will 594 * immediately free pages with this flag set to the buddy allocator. 595 * Synchronization: Can be set after huge page allocation from buddy when 596 * code knows it has only reference. All other examinations and 597 * modifications require hugetlb_lock. 598 * HPG_freed - Set when page is on the free lists. 599 * Synchronization: hugetlb_lock held for examination and modification. 600 * HPG_vmemmap_optimized - Set when the vmemmap pages of the page are freed. 601 * HPG_raw_hwp_unreliable - Set when the hugetlb page has a hwpoison sub-page 602 * that is not tracked by raw_hwp_page list. 603 */ 604enum hugetlb_page_flags { 605 HPG_restore_reserve = 0, 606 HPG_migratable, 607 HPG_temporary, 608 HPG_freed, 609 HPG_vmemmap_optimized, 610 HPG_raw_hwp_unreliable, 611 __NR_HPAGEFLAGS, 612}; 613 614/* 615 * Macros to create test, set and clear function definitions for 616 * hugetlb specific page flags. 617 */ 618#ifdef CONFIG_HUGETLB_PAGE 619#define TESTHPAGEFLAG(uname, flname) \ 620static __always_inline \ 621bool folio_test_hugetlb_##flname(struct folio *folio) \ 622 { void *private = &folio->private; \ 623 return test_bit(HPG_##flname, private); \ 624 } \ 625static inline int HPage##uname(struct page *page) \ 626 { return test_bit(HPG_##flname, &(page->private)); } 627 628#define SETHPAGEFLAG(uname, flname) \ 629static __always_inline \ 630void folio_set_hugetlb_##flname(struct folio *folio) \ 631 { void *private = &folio->private; \ 632 set_bit(HPG_##flname, private); \ 633 } \ 634static inline void SetHPage##uname(struct page *page) \ 635 { set_bit(HPG_##flname, &(page->private)); } 636 637#define CLEARHPAGEFLAG(uname, flname) \ 638static __always_inline \ 639void folio_clear_hugetlb_##flname(struct folio *folio) \ 640 { void *private = &folio->private; \ 641 clear_bit(HPG_##flname, private); \ 642 } \ 643static inline void ClearHPage##uname(struct page *page) \ 644 { clear_bit(HPG_##flname, &(page->private)); } 645#else 646#define TESTHPAGEFLAG(uname, flname) \ 647static inline bool \ 648folio_test_hugetlb_##flname(struct folio *folio) \ 649 { return 0; } \ 650static inline int HPage##uname(struct page *page) \ 651 { return 0; } 652 653#define SETHPAGEFLAG(uname, flname) \ 654static inline void \ 655folio_set_hugetlb_##flname(struct folio *folio) \ 656 { } \ 657static inline void SetHPage##uname(struct page *page) \ 658 { } 659 660#define CLEARHPAGEFLAG(uname, flname) \ 661static inline void \ 662folio_clear_hugetlb_##flname(struct folio *folio) \ 663 { } \ 664static inline void ClearHPage##uname(struct page *page) \ 665 { } 666#endif 667 668#define HPAGEFLAG(uname, flname) \ 669 TESTHPAGEFLAG(uname, flname) \ 670 SETHPAGEFLAG(uname, flname) \ 671 CLEARHPAGEFLAG(uname, flname) \ 672 673/* 674 * Create functions associated with hugetlb page flags 675 */ 676HPAGEFLAG(RestoreReserve, restore_reserve) 677HPAGEFLAG(Migratable, migratable) 678HPAGEFLAG(Temporary, temporary) 679HPAGEFLAG(Freed, freed) 680HPAGEFLAG(VmemmapOptimized, vmemmap_optimized) 681HPAGEFLAG(RawHwpUnreliable, raw_hwp_unreliable) 682 683#ifdef CONFIG_HUGETLB_PAGE 684 685#define HSTATE_NAME_LEN 32 686/* Defines one hugetlb page size */ 687struct hstate { 688 struct mutex resize_lock; 689 int next_nid_to_alloc; 690 int next_nid_to_free; 691 unsigned int order; 692 unsigned int demote_order; 693 unsigned long mask; 694 unsigned long max_huge_pages; 695 unsigned long nr_huge_pages; 696 unsigned long free_huge_pages; 697 unsigned long resv_huge_pages; 698 unsigned long surplus_huge_pages; 699 unsigned long nr_overcommit_huge_pages; 700 struct list_head hugepage_activelist; 701 struct list_head hugepage_freelists[MAX_NUMNODES]; 702 unsigned int max_huge_pages_node[MAX_NUMNODES]; 703 unsigned int nr_huge_pages_node[MAX_NUMNODES]; 704 unsigned int free_huge_pages_node[MAX_NUMNODES]; 705 unsigned int surplus_huge_pages_node[MAX_NUMNODES]; 706#ifdef CONFIG_CGROUP_HUGETLB 707 /* cgroup control files */ 708 struct cftype cgroup_files_dfl[8]; 709 struct cftype cgroup_files_legacy[10]; 710#endif 711 char name[HSTATE_NAME_LEN]; 712}; 713 714struct huge_bootmem_page { 715 struct list_head list; 716 struct hstate *hstate; 717}; 718 719int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list); 720struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, 721 unsigned long addr, int avoid_reserve); 722struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid, 723 nodemask_t *nmask, gfp_t gfp_mask); 724struct folio *alloc_hugetlb_folio_vma(struct hstate *h, struct vm_area_struct *vma, 725 unsigned long address); 726int hugetlb_add_to_page_cache(struct folio *folio, struct address_space *mapping, 727 pgoff_t idx); 728void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma, 729 unsigned long address, struct folio *folio); 730 731/* arch callback */ 732int __init __alloc_bootmem_huge_page(struct hstate *h, int nid); 733int __init alloc_bootmem_huge_page(struct hstate *h, int nid); 734bool __init hugetlb_node_alloc_supported(void); 735 736void __init hugetlb_add_hstate(unsigned order); 737bool __init arch_hugetlb_valid_size(unsigned long size); 738struct hstate *size_to_hstate(unsigned long size); 739 740#ifndef HUGE_MAX_HSTATE 741#define HUGE_MAX_HSTATE 1 742#endif 743 744extern struct hstate hstates[HUGE_MAX_HSTATE]; 745extern unsigned int default_hstate_idx; 746 747#define default_hstate (hstates[default_hstate_idx]) 748 749static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio) 750{ 751 return folio->_hugetlb_subpool; 752} 753 754/* 755 * hugetlb page subpool pointer located in hpage[2].hugetlb_subpool 756 */ 757static inline struct hugepage_subpool *hugetlb_page_subpool(struct page *hpage) 758{ 759 return hugetlb_folio_subpool(page_folio(hpage)); 760} 761 762static inline void hugetlb_set_folio_subpool(struct folio *folio, 763 struct hugepage_subpool *subpool) 764{ 765 folio->_hugetlb_subpool = subpool; 766} 767 768static inline void hugetlb_set_page_subpool(struct page *hpage, 769 struct hugepage_subpool *subpool) 770{ 771 hugetlb_set_folio_subpool(page_folio(hpage), subpool); 772} 773 774static inline struct hstate *hstate_file(struct file *f) 775{ 776 return hstate_inode(file_inode(f)); 777} 778 779static inline struct hstate *hstate_sizelog(int page_size_log) 780{ 781 if (!page_size_log) 782 return &default_hstate; 783 784 if (page_size_log < BITS_PER_LONG) 785 return size_to_hstate(1UL << page_size_log); 786 787 return NULL; 788} 789 790static inline struct hstate *hstate_vma(struct vm_area_struct *vma) 791{ 792 return hstate_file(vma->vm_file); 793} 794 795static inline unsigned long huge_page_size(const struct hstate *h) 796{ 797 return (unsigned long)PAGE_SIZE << h->order; 798} 799 800extern unsigned long vma_kernel_pagesize(struct vm_area_struct *vma); 801 802extern unsigned long vma_mmu_pagesize(struct vm_area_struct *vma); 803 804static inline unsigned long huge_page_mask(struct hstate *h) 805{ 806 return h->mask; 807} 808 809static inline unsigned int huge_page_order(struct hstate *h) 810{ 811 return h->order; 812} 813 814static inline unsigned huge_page_shift(struct hstate *h) 815{ 816 return h->order + PAGE_SHIFT; 817} 818 819static inline bool hstate_is_gigantic(struct hstate *h) 820{ 821 return huge_page_order(h) >= MAX_ORDER; 822} 823 824static inline unsigned int pages_per_huge_page(const struct hstate *h) 825{ 826 return 1 << h->order; 827} 828 829static inline unsigned int blocks_per_huge_page(struct hstate *h) 830{ 831 return huge_page_size(h) / 512; 832} 833 834#include <asm/hugetlb.h> 835 836#ifndef is_hugepage_only_range 837static inline int is_hugepage_only_range(struct mm_struct *mm, 838 unsigned long addr, unsigned long len) 839{ 840 return 0; 841} 842#define is_hugepage_only_range is_hugepage_only_range 843#endif 844 845#ifndef arch_clear_hugepage_flags 846static inline void arch_clear_hugepage_flags(struct page *page) { } 847#define arch_clear_hugepage_flags arch_clear_hugepage_flags 848#endif 849 850#ifndef arch_make_huge_pte 851static inline pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, 852 vm_flags_t flags) 853{ 854 return pte_mkhuge(entry); 855} 856#endif 857 858static inline struct hstate *folio_hstate(struct folio *folio) 859{ 860 VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio); 861 return size_to_hstate(folio_size(folio)); 862} 863 864static inline struct hstate *page_hstate(struct page *page) 865{ 866 return folio_hstate(page_folio(page)); 867} 868 869static inline unsigned hstate_index_to_shift(unsigned index) 870{ 871 return hstates[index].order + PAGE_SHIFT; 872} 873 874static inline int hstate_index(struct hstate *h) 875{ 876 return h - hstates; 877} 878 879extern int dissolve_free_huge_page(struct page *page); 880extern int dissolve_free_huge_pages(unsigned long start_pfn, 881 unsigned long end_pfn); 882 883#ifdef CONFIG_MEMORY_FAILURE 884extern void folio_clear_hugetlb_hwpoison(struct folio *folio); 885#else 886static inline void folio_clear_hugetlb_hwpoison(struct folio *folio) 887{ 888} 889#endif 890 891#ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION 892#ifndef arch_hugetlb_migration_supported 893static inline bool arch_hugetlb_migration_supported(struct hstate *h) 894{ 895 if ((huge_page_shift(h) == PMD_SHIFT) || 896 (huge_page_shift(h) == PUD_SHIFT) || 897 (huge_page_shift(h) == PGDIR_SHIFT)) 898 return true; 899 else 900 return false; 901} 902#endif 903#else 904static inline bool arch_hugetlb_migration_supported(struct hstate *h) 905{ 906 return false; 907} 908#endif 909 910static inline bool hugepage_migration_supported(struct hstate *h) 911{ 912 return arch_hugetlb_migration_supported(h); 913} 914 915/* 916 * Movability check is different as compared to migration check. 917 * It determines whether or not a huge page should be placed on 918 * movable zone or not. Movability of any huge page should be 919 * required only if huge page size is supported for migration. 920 * There won't be any reason for the huge page to be movable if 921 * it is not migratable to start with. Also the size of the huge 922 * page should be large enough to be placed under a movable zone 923 * and still feasible enough to be migratable. Just the presence 924 * in movable zone does not make the migration feasible. 925 * 926 * So even though large huge page sizes like the gigantic ones 927 * are migratable they should not be movable because its not 928 * feasible to migrate them from movable zone. 929 */ 930static inline bool hugepage_movable_supported(struct hstate *h) 931{ 932 if (!hugepage_migration_supported(h)) 933 return false; 934 935 if (hstate_is_gigantic(h)) 936 return false; 937 return true; 938} 939 940/* Movability of hugepages depends on migration support. */ 941static inline gfp_t htlb_alloc_mask(struct hstate *h) 942{ 943 if (hugepage_movable_supported(h)) 944 return GFP_HIGHUSER_MOVABLE; 945 else 946 return GFP_HIGHUSER; 947} 948 949static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask) 950{ 951 gfp_t modified_mask = htlb_alloc_mask(h); 952 953 /* Some callers might want to enforce node */ 954 modified_mask |= (gfp_mask & __GFP_THISNODE); 955 956 modified_mask |= (gfp_mask & __GFP_NOWARN); 957 958 return modified_mask; 959} 960 961static inline spinlock_t *huge_pte_lockptr(struct hstate *h, 962 struct mm_struct *mm, pte_t *pte) 963{ 964 if (huge_page_size(h) == PMD_SIZE) 965 return pmd_lockptr(mm, (pmd_t *) pte); 966 VM_BUG_ON(huge_page_size(h) == PAGE_SIZE); 967 return &mm->page_table_lock; 968} 969 970#ifndef hugepages_supported 971/* 972 * Some platform decide whether they support huge pages at boot 973 * time. Some of them, such as powerpc, set HPAGE_SHIFT to 0 974 * when there is no such support 975 */ 976#define hugepages_supported() (HPAGE_SHIFT != 0) 977#endif 978 979void hugetlb_report_usage(struct seq_file *m, struct mm_struct *mm); 980 981static inline void hugetlb_count_init(struct mm_struct *mm) 982{ 983 atomic_long_set(&mm->hugetlb_usage, 0); 984} 985 986static inline void hugetlb_count_add(long l, struct mm_struct *mm) 987{ 988 atomic_long_add(l, &mm->hugetlb_usage); 989} 990 991static inline void hugetlb_count_sub(long l, struct mm_struct *mm) 992{ 993 atomic_long_sub(l, &mm->hugetlb_usage); 994} 995 996#ifndef huge_ptep_modify_prot_start 997#define huge_ptep_modify_prot_start huge_ptep_modify_prot_start 998static inline pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, 999 unsigned long addr, pte_t *ptep) 1000{ 1001 return huge_ptep_get_and_clear(vma->vm_mm, addr, ptep); 1002} 1003#endif 1004 1005#ifndef huge_ptep_modify_prot_commit 1006#define huge_ptep_modify_prot_commit huge_ptep_modify_prot_commit 1007static inline void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, 1008 unsigned long addr, pte_t *ptep, 1009 pte_t old_pte, pte_t pte) 1010{ 1011 set_huge_pte_at(vma->vm_mm, addr, ptep, pte); 1012} 1013#endif 1014 1015#ifdef CONFIG_NUMA 1016void hugetlb_register_node(struct node *node); 1017void hugetlb_unregister_node(struct node *node); 1018#endif 1019 1020#else /* CONFIG_HUGETLB_PAGE */ 1021struct hstate {}; 1022 1023static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio) 1024{ 1025 return NULL; 1026} 1027 1028static inline struct hugepage_subpool *hugetlb_page_subpool(struct page *hpage) 1029{ 1030 return NULL; 1031} 1032 1033static inline int isolate_or_dissolve_huge_page(struct page *page, 1034 struct list_head *list) 1035{ 1036 return -ENOMEM; 1037} 1038 1039static inline struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, 1040 unsigned long addr, 1041 int avoid_reserve) 1042{ 1043 return NULL; 1044} 1045 1046static inline struct folio * 1047alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid, 1048 nodemask_t *nmask, gfp_t gfp_mask) 1049{ 1050 return NULL; 1051} 1052 1053static inline struct folio *alloc_hugetlb_folio_vma(struct hstate *h, 1054 struct vm_area_struct *vma, 1055 unsigned long address) 1056{ 1057 return NULL; 1058} 1059 1060static inline int __alloc_bootmem_huge_page(struct hstate *h) 1061{ 1062 return 0; 1063} 1064 1065static inline struct hstate *hstate_file(struct file *f) 1066{ 1067 return NULL; 1068} 1069 1070static inline struct hstate *hstate_sizelog(int page_size_log) 1071{ 1072 return NULL; 1073} 1074 1075static inline struct hstate *hstate_vma(struct vm_area_struct *vma) 1076{ 1077 return NULL; 1078} 1079 1080static inline struct hstate *folio_hstate(struct folio *folio) 1081{ 1082 return NULL; 1083} 1084 1085static inline struct hstate *page_hstate(struct page *page) 1086{ 1087 return NULL; 1088} 1089 1090static inline struct hstate *size_to_hstate(unsigned long size) 1091{ 1092 return NULL; 1093} 1094 1095static inline unsigned long huge_page_size(struct hstate *h) 1096{ 1097 return PAGE_SIZE; 1098} 1099 1100static inline unsigned long huge_page_mask(struct hstate *h) 1101{ 1102 return PAGE_MASK; 1103} 1104 1105static inline unsigned long vma_kernel_pagesize(struct vm_area_struct *vma) 1106{ 1107 return PAGE_SIZE; 1108} 1109 1110static inline unsigned long vma_mmu_pagesize(struct vm_area_struct *vma) 1111{ 1112 return PAGE_SIZE; 1113} 1114 1115static inline unsigned int huge_page_order(struct hstate *h) 1116{ 1117 return 0; 1118} 1119 1120static inline unsigned int huge_page_shift(struct hstate *h) 1121{ 1122 return PAGE_SHIFT; 1123} 1124 1125static inline bool hstate_is_gigantic(struct hstate *h) 1126{ 1127 return false; 1128} 1129 1130static inline unsigned int pages_per_huge_page(struct hstate *h) 1131{ 1132 return 1; 1133} 1134 1135static inline unsigned hstate_index_to_shift(unsigned index) 1136{ 1137 return 0; 1138} 1139 1140static inline int hstate_index(struct hstate *h) 1141{ 1142 return 0; 1143} 1144 1145static inline int dissolve_free_huge_page(struct page *page) 1146{ 1147 return 0; 1148} 1149 1150static inline int dissolve_free_huge_pages(unsigned long start_pfn, 1151 unsigned long end_pfn) 1152{ 1153 return 0; 1154} 1155 1156static inline bool hugepage_migration_supported(struct hstate *h) 1157{ 1158 return false; 1159} 1160 1161static inline bool hugepage_movable_supported(struct hstate *h) 1162{ 1163 return false; 1164} 1165 1166static inline gfp_t htlb_alloc_mask(struct hstate *h) 1167{ 1168 return 0; 1169} 1170 1171static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask) 1172{ 1173 return 0; 1174} 1175 1176static inline spinlock_t *huge_pte_lockptr(struct hstate *h, 1177 struct mm_struct *mm, pte_t *pte) 1178{ 1179 return &mm->page_table_lock; 1180} 1181 1182static inline void hugetlb_count_init(struct mm_struct *mm) 1183{ 1184} 1185 1186static inline void hugetlb_report_usage(struct seq_file *f, struct mm_struct *m) 1187{ 1188} 1189 1190static inline void hugetlb_count_sub(long l, struct mm_struct *mm) 1191{ 1192} 1193 1194static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, 1195 unsigned long addr, pte_t *ptep) 1196{ 1197 return *ptep; 1198} 1199 1200static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, 1201 pte_t *ptep, pte_t pte) 1202{ 1203} 1204 1205static inline void hugetlb_register_node(struct node *node) 1206{ 1207} 1208 1209static inline void hugetlb_unregister_node(struct node *node) 1210{ 1211} 1212#endif /* CONFIG_HUGETLB_PAGE */ 1213 1214static inline spinlock_t *huge_pte_lock(struct hstate *h, 1215 struct mm_struct *mm, pte_t *pte) 1216{ 1217 spinlock_t *ptl; 1218 1219 ptl = huge_pte_lockptr(h, mm, pte); 1220 spin_lock(ptl); 1221 return ptl; 1222} 1223 1224#if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_CMA) 1225extern void __init hugetlb_cma_reserve(int order); 1226#else 1227static inline __init void hugetlb_cma_reserve(int order) 1228{ 1229} 1230#endif 1231 1232#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE 1233static inline bool hugetlb_pmd_shared(pte_t *pte) 1234{ 1235 return page_count(virt_to_page(pte)) > 1; 1236} 1237#else 1238static inline bool hugetlb_pmd_shared(pte_t *pte) 1239{ 1240 return false; 1241} 1242#endif 1243 1244bool want_pmd_share(struct vm_area_struct *vma, unsigned long addr); 1245 1246#ifndef __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE 1247/* 1248 * ARCHes with special requirements for evicting HUGETLB backing TLB entries can 1249 * implement this. 1250 */ 1251#define flush_hugetlb_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end) 1252#endif 1253 1254static inline bool __vma_shareable_lock(struct vm_area_struct *vma) 1255{ 1256 return (vma->vm_flags & VM_MAYSHARE) && vma->vm_private_data; 1257} 1258 1259/* 1260 * Safe version of huge_pte_offset() to check the locks. See comments 1261 * above huge_pte_offset(). 1262 */ 1263static inline pte_t * 1264hugetlb_walk(struct vm_area_struct *vma, unsigned long addr, unsigned long sz) 1265{ 1266#if defined(CONFIG_HUGETLB_PAGE) && \ 1267 defined(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) && defined(CONFIG_LOCKDEP) 1268 struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; 1269 1270 /* 1271 * If pmd sharing possible, locking needed to safely walk the 1272 * hugetlb pgtables. More information can be found at the comment 1273 * above huge_pte_offset() in the same file. 1274 * 1275 * NOTE: lockdep_is_held() is only defined with CONFIG_LOCKDEP. 1276 */ 1277 if (__vma_shareable_lock(vma)) 1278 WARN_ON_ONCE(!lockdep_is_held(&vma_lock->rw_sema) && 1279 !lockdep_is_held( 1280 &vma->vm_file->f_mapping->i_mmap_rwsem)); 1281#endif 1282 return huge_pte_offset(vma->vm_mm, addr, sz); 1283} 1284 1285#endif /* _LINUX_HUGETLB_H */