at master 38 kB view raw
1/* SPDX-License-Identifier: GPL-2.0 */ 2#ifndef _LINUX_HUGETLB_H 3#define _LINUX_HUGETLB_H 4 5#include <linux/mm.h> 6#include <linux/mm_types.h> 7#include <linux/mmdebug.h> 8#include <linux/fs.h> 9#include <linux/hugetlb_inline.h> 10#include <linux/cgroup.h> 11#include <linux/page_ref.h> 12#include <linux/list.h> 13#include <linux/kref.h> 14#include <linux/pgtable.h> 15#include <linux/gfp.h> 16#include <linux/userfaultfd_k.h> 17#include <linux/nodemask.h> 18 19struct ctl_table; 20struct user_struct; 21struct mmu_gather; 22struct node; 23 24void free_huge_folio(struct folio *folio); 25 26#ifdef CONFIG_HUGETLB_PAGE 27 28#include <linux/pagemap.h> 29#include <linux/shm.h> 30#include <asm/tlbflush.h> 31 32/* 33 * For HugeTLB page, there are more metadata to save in the struct page. But 34 * the head struct page cannot meet our needs, so we have to abuse other tail 35 * struct page to store the metadata. 36 */ 37#define __NR_USED_SUBPAGE 3 38 39struct hugepage_subpool { 40 spinlock_t lock; 41 long count; 42 long max_hpages; /* Maximum huge pages or -1 if no maximum. */ 43 long used_hpages; /* Used count against maximum, includes */ 44 /* both allocated and reserved pages. */ 45 struct hstate *hstate; 46 long min_hpages; /* Minimum huge pages or -1 if no minimum. */ 47 long rsv_hpages; /* Pages reserved against global pool to */ 48 /* satisfy minimum size. */ 49}; 50 51struct resv_map { 52 struct kref refs; 53 spinlock_t lock; 54 struct list_head regions; 55 long adds_in_progress; 56 struct list_head region_cache; 57 long region_cache_count; 58 struct rw_semaphore rw_sema; 59#ifdef CONFIG_CGROUP_HUGETLB 60 /* 61 * On private mappings, the counter to uncharge reservations is stored 62 * here. If these fields are 0, then either the mapping is shared, or 63 * cgroup accounting is disabled for this resv_map. 64 */ 65 struct page_counter *reservation_counter; 66 unsigned long pages_per_hpage; 67 struct cgroup_subsys_state *css; 68#endif 69}; 70 71/* 72 * Region tracking -- allows tracking of reservations and instantiated pages 73 * across the pages in a mapping. 74 * 75 * The region data structures are embedded into a resv_map and protected 76 * by a resv_map's lock. The set of regions within the resv_map represent 77 * reservations for huge pages, or huge pages that have already been 78 * instantiated within the map. The from and to elements are huge page 79 * indices into the associated mapping. from indicates the starting index 80 * of the region. to represents the first index past the end of the region. 81 * 82 * For example, a file region structure with from == 0 and to == 4 represents 83 * four huge pages in a mapping. It is important to note that the to element 84 * represents the first element past the end of the region. This is used in 85 * arithmetic as 4(to) - 0(from) = 4 huge pages in the region. 86 * 87 * Interval notation of the form [from, to) will be used to indicate that 88 * the endpoint from is inclusive and to is exclusive. 89 */ 90struct file_region { 91 struct list_head link; 92 long from; 93 long to; 94#ifdef CONFIG_CGROUP_HUGETLB 95 /* 96 * On shared mappings, each reserved region appears as a struct 97 * file_region in resv_map. These fields hold the info needed to 98 * uncharge each reservation. 99 */ 100 struct page_counter *reservation_counter; 101 struct cgroup_subsys_state *css; 102#endif 103}; 104 105struct hugetlb_vma_lock { 106 struct kref refs; 107 struct rw_semaphore rw_sema; 108 struct vm_area_struct *vma; 109}; 110 111extern struct resv_map *resv_map_alloc(void); 112void resv_map_release(struct kref *ref); 113 114extern spinlock_t hugetlb_lock; 115extern int hugetlb_max_hstate __read_mostly; 116#define for_each_hstate(h) \ 117 for ((h) = hstates; (h) < &hstates[hugetlb_max_hstate]; (h)++) 118 119struct hugepage_subpool *hugepage_new_subpool(struct hstate *h, long max_hpages, 120 long min_hpages); 121void hugepage_put_subpool(struct hugepage_subpool *spool); 122 123void hugetlb_dup_vma_private(struct vm_area_struct *vma); 124void clear_vma_resv_huge_pages(struct vm_area_struct *vma); 125int move_hugetlb_page_tables(struct vm_area_struct *vma, 126 struct vm_area_struct *new_vma, 127 unsigned long old_addr, unsigned long new_addr, 128 unsigned long len); 129int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, 130 struct vm_area_struct *, struct vm_area_struct *); 131void unmap_hugepage_range(struct vm_area_struct *, 132 unsigned long start, unsigned long end, 133 struct folio *, zap_flags_t); 134void __unmap_hugepage_range(struct mmu_gather *tlb, 135 struct vm_area_struct *vma, 136 unsigned long start, unsigned long end, 137 struct folio *, zap_flags_t zap_flags); 138void hugetlb_report_meminfo(struct seq_file *); 139int hugetlb_report_node_meminfo(char *buf, int len, int nid); 140void hugetlb_show_meminfo_node(int nid); 141unsigned long hugetlb_total_pages(void); 142vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, 143 unsigned long address, unsigned int flags); 144#ifdef CONFIG_USERFAULTFD 145int hugetlb_mfill_atomic_pte(pte_t *dst_pte, 146 struct vm_area_struct *dst_vma, 147 unsigned long dst_addr, 148 unsigned long src_addr, 149 uffd_flags_t flags, 150 struct folio **foliop); 151#endif /* CONFIG_USERFAULTFD */ 152long hugetlb_reserve_pages(struct inode *inode, long from, long to, 153 struct vm_area_desc *desc, vm_flags_t vm_flags); 154long hugetlb_unreserve_pages(struct inode *inode, long start, long end, 155 long freed); 156bool folio_isolate_hugetlb(struct folio *folio, struct list_head *list); 157int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison); 158int get_huge_page_for_hwpoison(unsigned long pfn, int flags, 159 bool *migratable_cleared); 160void folio_putback_hugetlb(struct folio *folio); 161void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int reason); 162void hugetlb_fix_reserve_counts(struct inode *inode); 163extern struct mutex *hugetlb_fault_mutex_table; 164u32 hugetlb_fault_mutex_hash(struct address_space *mapping, pgoff_t idx); 165 166pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma, 167 unsigned long addr, pud_t *pud); 168bool hugetlbfs_pagecache_present(struct hstate *h, 169 struct vm_area_struct *vma, 170 unsigned long address); 171 172struct address_space *hugetlb_folio_mapping_lock_write(struct folio *folio); 173 174extern int sysctl_hugetlb_shm_group __read_mostly; 175extern struct list_head huge_boot_pages[MAX_NUMNODES]; 176 177void hugetlb_bootmem_alloc(void); 178bool hugetlb_bootmem_allocated(void); 179extern nodemask_t hugetlb_bootmem_nodes; 180void hugetlb_bootmem_set_nodes(void); 181 182/* arch callbacks */ 183 184#ifndef CONFIG_HIGHPTE 185/* 186 * pte_offset_huge() and pte_alloc_huge() are helpers for those architectures 187 * which may go down to the lowest PTE level in their huge_pte_offset() and 188 * huge_pte_alloc(): to avoid reliance on pte_offset_map() without pte_unmap(). 189 */ 190static inline pte_t *pte_offset_huge(pmd_t *pmd, unsigned long address) 191{ 192 return pte_offset_kernel(pmd, address); 193} 194static inline pte_t *pte_alloc_huge(struct mm_struct *mm, pmd_t *pmd, 195 unsigned long address) 196{ 197 return pte_alloc(mm, pmd) ? NULL : pte_offset_huge(pmd, address); 198} 199#endif 200 201pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, 202 unsigned long addr, unsigned long sz); 203/* 204 * huge_pte_offset(): Walk the hugetlb pgtable until the last level PTE. 205 * Returns the pte_t* if found, or NULL if the address is not mapped. 206 * 207 * IMPORTANT: we should normally not directly call this function, instead 208 * this is only a common interface to implement arch-specific 209 * walker. Please use hugetlb_walk() instead, because that will attempt to 210 * verify the locking for you. 211 * 212 * Since this function will walk all the pgtable pages (including not only 213 * high-level pgtable page, but also PUD entry that can be unshared 214 * concurrently for VM_SHARED), the caller of this function should be 215 * responsible of its thread safety. One can follow this rule: 216 * 217 * (1) For private mappings: pmd unsharing is not possible, so holding the 218 * mmap_lock for either read or write is sufficient. Most callers 219 * already hold the mmap_lock, so normally, no special action is 220 * required. 221 * 222 * (2) For shared mappings: pmd unsharing is possible (so the PUD-ranged 223 * pgtable page can go away from under us! It can be done by a pmd 224 * unshare with a follow up munmap() on the other process), then we 225 * need either: 226 * 227 * (2.1) hugetlb vma lock read or write held, to make sure pmd unshare 228 * won't happen upon the range (it also makes sure the pte_t we 229 * read is the right and stable one), or, 230 * 231 * (2.2) hugetlb mapping i_mmap_rwsem lock held read or write, to make 232 * sure even if unshare happened the racy unmap() will wait until 233 * i_mmap_rwsem is released. 234 * 235 * Option (2.1) is the safest, which guarantees pte stability from pmd 236 * sharing pov, until the vma lock released. Option (2.2) doesn't protect 237 * a concurrent pmd unshare, but it makes sure the pgtable page is safe to 238 * access. 239 */ 240pte_t *huge_pte_offset(struct mm_struct *mm, 241 unsigned long addr, unsigned long sz); 242unsigned long hugetlb_mask_last_page(struct hstate *h); 243int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, 244 unsigned long addr, pte_t *ptep); 245void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, 246 unsigned long *start, unsigned long *end); 247 248extern void __hugetlb_zap_begin(struct vm_area_struct *vma, 249 unsigned long *begin, unsigned long *end); 250extern void __hugetlb_zap_end(struct vm_area_struct *vma, 251 struct zap_details *details); 252 253static inline void hugetlb_zap_begin(struct vm_area_struct *vma, 254 unsigned long *start, unsigned long *end) 255{ 256 if (is_vm_hugetlb_page(vma)) 257 __hugetlb_zap_begin(vma, start, end); 258} 259 260static inline void hugetlb_zap_end(struct vm_area_struct *vma, 261 struct zap_details *details) 262{ 263 if (is_vm_hugetlb_page(vma)) 264 __hugetlb_zap_end(vma, details); 265} 266 267void hugetlb_vma_lock_read(struct vm_area_struct *vma); 268void hugetlb_vma_unlock_read(struct vm_area_struct *vma); 269void hugetlb_vma_lock_write(struct vm_area_struct *vma); 270void hugetlb_vma_unlock_write(struct vm_area_struct *vma); 271int hugetlb_vma_trylock_write(struct vm_area_struct *vma); 272void hugetlb_vma_assert_locked(struct vm_area_struct *vma); 273void hugetlb_vma_lock_release(struct kref *kref); 274long hugetlb_change_protection(struct vm_area_struct *vma, 275 unsigned long address, unsigned long end, pgprot_t newprot, 276 unsigned long cp_flags); 277void hugetlb_unshare_all_pmds(struct vm_area_struct *vma); 278void fixup_hugetlb_reservations(struct vm_area_struct *vma); 279void hugetlb_split(struct vm_area_struct *vma, unsigned long addr); 280int hugetlb_vma_lock_alloc(struct vm_area_struct *vma); 281 282#else /* !CONFIG_HUGETLB_PAGE */ 283 284static inline void hugetlb_dup_vma_private(struct vm_area_struct *vma) 285{ 286} 287 288static inline void clear_vma_resv_huge_pages(struct vm_area_struct *vma) 289{ 290} 291 292static inline unsigned long hugetlb_total_pages(void) 293{ 294 return 0; 295} 296 297static inline struct address_space *hugetlb_folio_mapping_lock_write( 298 struct folio *folio) 299{ 300 return NULL; 301} 302 303static inline int huge_pmd_unshare(struct mm_struct *mm, 304 struct vm_area_struct *vma, 305 unsigned long addr, pte_t *ptep) 306{ 307 return 0; 308} 309 310static inline void adjust_range_if_pmd_sharing_possible( 311 struct vm_area_struct *vma, 312 unsigned long *start, unsigned long *end) 313{ 314} 315 316static inline void hugetlb_zap_begin( 317 struct vm_area_struct *vma, 318 unsigned long *start, unsigned long *end) 319{ 320} 321 322static inline void hugetlb_zap_end( 323 struct vm_area_struct *vma, 324 struct zap_details *details) 325{ 326} 327 328static inline int copy_hugetlb_page_range(struct mm_struct *dst, 329 struct mm_struct *src, 330 struct vm_area_struct *dst_vma, 331 struct vm_area_struct *src_vma) 332{ 333 BUG(); 334 return 0; 335} 336 337static inline int move_hugetlb_page_tables(struct vm_area_struct *vma, 338 struct vm_area_struct *new_vma, 339 unsigned long old_addr, 340 unsigned long new_addr, 341 unsigned long len) 342{ 343 BUG(); 344 return 0; 345} 346 347static inline void hugetlb_report_meminfo(struct seq_file *m) 348{ 349} 350 351static inline int hugetlb_report_node_meminfo(char *buf, int len, int nid) 352{ 353 return 0; 354} 355 356static inline void hugetlb_show_meminfo_node(int nid) 357{ 358} 359 360static inline void hugetlb_vma_lock_read(struct vm_area_struct *vma) 361{ 362} 363 364static inline void hugetlb_vma_unlock_read(struct vm_area_struct *vma) 365{ 366} 367 368static inline void hugetlb_vma_lock_write(struct vm_area_struct *vma) 369{ 370} 371 372static inline void hugetlb_vma_unlock_write(struct vm_area_struct *vma) 373{ 374} 375 376static inline int hugetlb_vma_trylock_write(struct vm_area_struct *vma) 377{ 378 return 1; 379} 380 381static inline void hugetlb_vma_assert_locked(struct vm_area_struct *vma) 382{ 383} 384 385static inline int is_hugepage_only_range(struct mm_struct *mm, 386 unsigned long addr, unsigned long len) 387{ 388 return 0; 389} 390 391#ifdef CONFIG_USERFAULTFD 392static inline int hugetlb_mfill_atomic_pte(pte_t *dst_pte, 393 struct vm_area_struct *dst_vma, 394 unsigned long dst_addr, 395 unsigned long src_addr, 396 uffd_flags_t flags, 397 struct folio **foliop) 398{ 399 BUG(); 400 return 0; 401} 402#endif /* CONFIG_USERFAULTFD */ 403 404static inline pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, 405 unsigned long sz) 406{ 407 return NULL; 408} 409 410static inline bool folio_isolate_hugetlb(struct folio *folio, struct list_head *list) 411{ 412 return false; 413} 414 415static inline int get_hwpoison_hugetlb_folio(struct folio *folio, bool *hugetlb, bool unpoison) 416{ 417 return 0; 418} 419 420static inline int get_huge_page_for_hwpoison(unsigned long pfn, int flags, 421 bool *migratable_cleared) 422{ 423 return 0; 424} 425 426static inline void folio_putback_hugetlb(struct folio *folio) 427{ 428} 429 430static inline void move_hugetlb_state(struct folio *old_folio, 431 struct folio *new_folio, int reason) 432{ 433} 434 435static inline long hugetlb_change_protection( 436 struct vm_area_struct *vma, unsigned long address, 437 unsigned long end, pgprot_t newprot, 438 unsigned long cp_flags) 439{ 440 return 0; 441} 442 443static inline void __unmap_hugepage_range(struct mmu_gather *tlb, 444 struct vm_area_struct *vma, unsigned long start, 445 unsigned long end, struct folio *folio, 446 zap_flags_t zap_flags) 447{ 448 BUG(); 449} 450 451static inline vm_fault_t hugetlb_fault(struct mm_struct *mm, 452 struct vm_area_struct *vma, unsigned long address, 453 unsigned int flags) 454{ 455 BUG(); 456 return 0; 457} 458 459static inline void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { } 460 461static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma) 462{ 463} 464 465static inline void hugetlb_split(struct vm_area_struct *vma, unsigned long addr) {} 466 467static inline int hugetlb_vma_lock_alloc(struct vm_area_struct *vma) 468{ 469 return 0; 470} 471 472#endif /* !CONFIG_HUGETLB_PAGE */ 473 474#ifndef pgd_write 475static inline int pgd_write(pgd_t pgd) 476{ 477 BUG(); 478 return 0; 479} 480#endif 481 482#define HUGETLB_ANON_FILE "anon_hugepage" 483 484enum { 485 /* 486 * The file will be used as an shm file so shmfs accounting rules 487 * apply 488 */ 489 HUGETLB_SHMFS_INODE = 1, 490 /* 491 * The file is being created on the internal vfs mount and shmfs 492 * accounting rules do not apply 493 */ 494 HUGETLB_ANONHUGE_INODE = 2, 495}; 496 497#ifdef CONFIG_HUGETLBFS 498struct hugetlbfs_sb_info { 499 long max_inodes; /* inodes allowed */ 500 long free_inodes; /* inodes free */ 501 spinlock_t stat_lock; 502 struct hstate *hstate; 503 struct hugepage_subpool *spool; 504 kuid_t uid; 505 kgid_t gid; 506 umode_t mode; 507}; 508 509static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb) 510{ 511 return sb->s_fs_info; 512} 513 514struct hugetlbfs_inode_info { 515 struct inode vfs_inode; 516 unsigned int seals; 517}; 518 519static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode) 520{ 521 return container_of(inode, struct hugetlbfs_inode_info, vfs_inode); 522} 523 524extern const struct vm_operations_struct hugetlb_vm_ops; 525struct file *hugetlb_file_setup(const char *name, size_t size, vm_flags_t acct, 526 int creat_flags, int page_size_log); 527 528static inline bool is_file_hugepages(const struct file *file) 529{ 530 return file->f_op->fop_flags & FOP_HUGE_PAGES; 531} 532 533static inline struct hstate *hstate_inode(struct inode *i) 534{ 535 return HUGETLBFS_SB(i->i_sb)->hstate; 536} 537#else /* !CONFIG_HUGETLBFS */ 538 539#define is_file_hugepages(file) false 540static inline struct file * 541hugetlb_file_setup(const char *name, size_t size, vm_flags_t acctflag, 542 int creat_flags, int page_size_log) 543{ 544 return ERR_PTR(-ENOSYS); 545} 546 547static inline struct hstate *hstate_inode(struct inode *i) 548{ 549 return NULL; 550} 551#endif /* !CONFIG_HUGETLBFS */ 552 553unsigned long 554hugetlb_get_unmapped_area(struct file *file, unsigned long addr, 555 unsigned long len, unsigned long pgoff, 556 unsigned long flags); 557 558/* 559 * huegtlb page specific state flags. These flags are located in page.private 560 * of the hugetlb head page. Functions created via the below macros should be 561 * used to manipulate these flags. 562 * 563 * HPG_restore_reserve - Set when a hugetlb page consumes a reservation at 564 * allocation time. Cleared when page is fully instantiated. Free 565 * routine checks flag to restore a reservation on error paths. 566 * Synchronization: Examined or modified by code that knows it has 567 * the only reference to page. i.e. After allocation but before use 568 * or when the page is being freed. 569 * HPG_migratable - Set after a newly allocated page is added to the page 570 * cache and/or page tables. Indicates the page is a candidate for 571 * migration. 572 * Synchronization: Initially set after new page allocation with no 573 * locking. When examined and modified during migration processing 574 * (isolate, migrate, putback) the hugetlb_lock is held. 575 * HPG_temporary - Set on a page that is temporarily allocated from the buddy 576 * allocator. Typically used for migration target pages when no pages 577 * are available in the pool. The hugetlb free page path will 578 * immediately free pages with this flag set to the buddy allocator. 579 * Synchronization: Can be set after huge page allocation from buddy when 580 * code knows it has only reference. All other examinations and 581 * modifications require hugetlb_lock. 582 * HPG_freed - Set when page is on the free lists. 583 * Synchronization: hugetlb_lock held for examination and modification. 584 * HPG_vmemmap_optimized - Set when the vmemmap pages of the page are freed. 585 * HPG_raw_hwp_unreliable - Set when the hugetlb page has a hwpoison sub-page 586 * that is not tracked by raw_hwp_page list. 587 */ 588enum hugetlb_page_flags { 589 HPG_restore_reserve = 0, 590 HPG_migratable, 591 HPG_temporary, 592 HPG_freed, 593 HPG_vmemmap_optimized, 594 HPG_raw_hwp_unreliable, 595 HPG_cma, 596 __NR_HPAGEFLAGS, 597}; 598 599/* 600 * Macros to create test, set and clear function definitions for 601 * hugetlb specific page flags. 602 */ 603#ifdef CONFIG_HUGETLB_PAGE 604#define TESTHPAGEFLAG(uname, flname) \ 605static __always_inline \ 606bool folio_test_hugetlb_##flname(struct folio *folio) \ 607 { void *private = &folio->private; \ 608 return test_bit(HPG_##flname, private); \ 609 } 610 611#define SETHPAGEFLAG(uname, flname) \ 612static __always_inline \ 613void folio_set_hugetlb_##flname(struct folio *folio) \ 614 { void *private = &folio->private; \ 615 set_bit(HPG_##flname, private); \ 616 } 617 618#define CLEARHPAGEFLAG(uname, flname) \ 619static __always_inline \ 620void folio_clear_hugetlb_##flname(struct folio *folio) \ 621 { void *private = &folio->private; \ 622 clear_bit(HPG_##flname, private); \ 623 } 624#else 625#define TESTHPAGEFLAG(uname, flname) \ 626static inline bool \ 627folio_test_hugetlb_##flname(struct folio *folio) \ 628 { return 0; } 629 630#define SETHPAGEFLAG(uname, flname) \ 631static inline void \ 632folio_set_hugetlb_##flname(struct folio *folio) \ 633 { } 634 635#define CLEARHPAGEFLAG(uname, flname) \ 636static inline void \ 637folio_clear_hugetlb_##flname(struct folio *folio) \ 638 { } 639#endif 640 641#define HPAGEFLAG(uname, flname) \ 642 TESTHPAGEFLAG(uname, flname) \ 643 SETHPAGEFLAG(uname, flname) \ 644 CLEARHPAGEFLAG(uname, flname) \ 645 646/* 647 * Create functions associated with hugetlb page flags 648 */ 649HPAGEFLAG(RestoreReserve, restore_reserve) 650HPAGEFLAG(Migratable, migratable) 651HPAGEFLAG(Temporary, temporary) 652HPAGEFLAG(Freed, freed) 653HPAGEFLAG(VmemmapOptimized, vmemmap_optimized) 654HPAGEFLAG(RawHwpUnreliable, raw_hwp_unreliable) 655HPAGEFLAG(Cma, cma) 656 657#ifdef CONFIG_HUGETLB_PAGE 658 659#define HSTATE_NAME_LEN 32 660/* Defines one hugetlb page size */ 661struct hstate { 662 struct mutex resize_lock; 663 struct lock_class_key resize_key; 664 int next_nid_to_alloc; 665 int next_nid_to_free; 666 unsigned int order; 667 unsigned int demote_order; 668 unsigned long mask; 669 unsigned long max_huge_pages; 670 unsigned long nr_huge_pages; 671 unsigned long free_huge_pages; 672 unsigned long resv_huge_pages; 673 unsigned long surplus_huge_pages; 674 unsigned long nr_overcommit_huge_pages; 675 struct list_head hugepage_activelist; 676 struct list_head hugepage_freelists[MAX_NUMNODES]; 677 unsigned int max_huge_pages_node[MAX_NUMNODES]; 678 unsigned int nr_huge_pages_node[MAX_NUMNODES]; 679 unsigned int free_huge_pages_node[MAX_NUMNODES]; 680 unsigned int surplus_huge_pages_node[MAX_NUMNODES]; 681 char name[HSTATE_NAME_LEN]; 682}; 683 684struct cma; 685 686struct huge_bootmem_page { 687 struct list_head list; 688 struct hstate *hstate; 689 unsigned long flags; 690 struct cma *cma; 691}; 692 693#define HUGE_BOOTMEM_HVO 0x0001 694#define HUGE_BOOTMEM_ZONES_VALID 0x0002 695#define HUGE_BOOTMEM_CMA 0x0004 696 697bool hugetlb_bootmem_page_zones_valid(int nid, struct huge_bootmem_page *m); 698 699int isolate_or_dissolve_huge_folio(struct folio *folio, struct list_head *list); 700int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn); 701void wait_for_freed_hugetlb_folios(void); 702struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, 703 unsigned long addr, bool cow_from_owner); 704struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid, 705 nodemask_t *nmask, gfp_t gfp_mask, 706 bool allow_alloc_fallback); 707struct folio *alloc_hugetlb_folio_reserve(struct hstate *h, int preferred_nid, 708 nodemask_t *nmask, gfp_t gfp_mask); 709 710int hugetlb_add_to_page_cache(struct folio *folio, struct address_space *mapping, 711 pgoff_t idx); 712void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma, 713 unsigned long address, struct folio *folio); 714 715/* arch callback */ 716int __init __alloc_bootmem_huge_page(struct hstate *h, int nid); 717int __init alloc_bootmem_huge_page(struct hstate *h, int nid); 718bool __init hugetlb_node_alloc_supported(void); 719 720void __init hugetlb_add_hstate(unsigned order); 721bool __init arch_hugetlb_valid_size(unsigned long size); 722struct hstate *size_to_hstate(unsigned long size); 723 724#ifndef HUGE_MAX_HSTATE 725#define HUGE_MAX_HSTATE 1 726#endif 727 728extern struct hstate hstates[HUGE_MAX_HSTATE]; 729extern unsigned int default_hstate_idx; 730 731#define default_hstate (hstates[default_hstate_idx]) 732 733static inline struct hugepage_subpool *subpool_inode(struct inode *inode) 734{ 735 return HUGETLBFS_SB(inode->i_sb)->spool; 736} 737 738static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio) 739{ 740 return folio->_hugetlb_subpool; 741} 742 743static inline void hugetlb_set_folio_subpool(struct folio *folio, 744 struct hugepage_subpool *subpool) 745{ 746 folio->_hugetlb_subpool = subpool; 747} 748 749static inline struct hstate *hstate_file(struct file *f) 750{ 751 return hstate_inode(file_inode(f)); 752} 753 754static inline struct hstate *hstate_sizelog(int page_size_log) 755{ 756 if (!page_size_log) 757 return &default_hstate; 758 759 if (page_size_log < BITS_PER_LONG) 760 return size_to_hstate(1UL << page_size_log); 761 762 return NULL; 763} 764 765static inline struct hstate *hstate_vma(struct vm_area_struct *vma) 766{ 767 return hstate_file(vma->vm_file); 768} 769 770static inline unsigned long huge_page_size(const struct hstate *h) 771{ 772 return (unsigned long)PAGE_SIZE << h->order; 773} 774 775extern unsigned long vma_kernel_pagesize(struct vm_area_struct *vma); 776 777extern unsigned long vma_mmu_pagesize(struct vm_area_struct *vma); 778 779static inline unsigned long huge_page_mask(struct hstate *h) 780{ 781 return h->mask; 782} 783 784static inline unsigned int huge_page_order(struct hstate *h) 785{ 786 return h->order; 787} 788 789static inline unsigned huge_page_shift(struct hstate *h) 790{ 791 return h->order + PAGE_SHIFT; 792} 793 794static inline bool order_is_gigantic(unsigned int order) 795{ 796 return order > MAX_PAGE_ORDER; 797} 798 799static inline bool hstate_is_gigantic(struct hstate *h) 800{ 801 return order_is_gigantic(huge_page_order(h)); 802} 803 804static inline unsigned int pages_per_huge_page(const struct hstate *h) 805{ 806 return 1 << h->order; 807} 808 809static inline unsigned int blocks_per_huge_page(struct hstate *h) 810{ 811 return huge_page_size(h) / 512; 812} 813 814static inline struct folio *filemap_lock_hugetlb_folio(struct hstate *h, 815 struct address_space *mapping, pgoff_t idx) 816{ 817 return filemap_lock_folio(mapping, idx << huge_page_order(h)); 818} 819 820#include <asm/hugetlb.h> 821 822#ifndef is_hugepage_only_range 823static inline int is_hugepage_only_range(struct mm_struct *mm, 824 unsigned long addr, unsigned long len) 825{ 826 return 0; 827} 828#define is_hugepage_only_range is_hugepage_only_range 829#endif 830 831#ifndef arch_clear_hugetlb_flags 832static inline void arch_clear_hugetlb_flags(struct folio *folio) { } 833#define arch_clear_hugetlb_flags arch_clear_hugetlb_flags 834#endif 835 836#ifndef arch_make_huge_pte 837static inline pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, 838 vm_flags_t flags) 839{ 840 return pte_mkhuge(entry); 841} 842#endif 843 844#ifndef arch_has_huge_bootmem_alloc 845/* 846 * Some architectures do their own bootmem allocation, so they can't use 847 * early CMA allocation. 848 */ 849static inline bool arch_has_huge_bootmem_alloc(void) 850{ 851 return false; 852} 853#endif 854 855static inline struct hstate *folio_hstate(struct folio *folio) 856{ 857 VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio); 858 return size_to_hstate(folio_size(folio)); 859} 860 861static inline unsigned hstate_index_to_shift(unsigned index) 862{ 863 return hstates[index].order + PAGE_SHIFT; 864} 865 866static inline int hstate_index(struct hstate *h) 867{ 868 return h - hstates; 869} 870 871int dissolve_free_hugetlb_folio(struct folio *folio); 872int dissolve_free_hugetlb_folios(unsigned long start_pfn, 873 unsigned long end_pfn); 874 875#ifdef CONFIG_MEMORY_FAILURE 876extern void folio_clear_hugetlb_hwpoison(struct folio *folio); 877#else 878static inline void folio_clear_hugetlb_hwpoison(struct folio *folio) 879{ 880} 881#endif 882 883#ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION 884#ifndef arch_hugetlb_migration_supported 885static inline bool arch_hugetlb_migration_supported(struct hstate *h) 886{ 887 if ((huge_page_shift(h) == PMD_SHIFT) || 888 (huge_page_shift(h) == PUD_SHIFT) || 889 (huge_page_shift(h) == PGDIR_SHIFT)) 890 return true; 891 else 892 return false; 893} 894#endif 895#else 896static inline bool arch_hugetlb_migration_supported(struct hstate *h) 897{ 898 return false; 899} 900#endif 901 902static inline bool hugepage_migration_supported(struct hstate *h) 903{ 904 return arch_hugetlb_migration_supported(h); 905} 906 907/* 908 * Movability check is different as compared to migration check. 909 * It determines whether or not a huge page should be placed on 910 * movable zone or not. Movability of any huge page should be 911 * required only if huge page size is supported for migration. 912 * There won't be any reason for the huge page to be movable if 913 * it is not migratable to start with. Also the size of the huge 914 * page should be large enough to be placed under a movable zone 915 * and still feasible enough to be migratable. Just the presence 916 * in movable zone does not make the migration feasible. 917 * 918 * So even though large huge page sizes like the gigantic ones 919 * are migratable they should not be movable because its not 920 * feasible to migrate them from movable zone. 921 */ 922static inline bool hugepage_movable_supported(struct hstate *h) 923{ 924 if (!hugepage_migration_supported(h)) 925 return false; 926 927 if (hstate_is_gigantic(h)) 928 return false; 929 return true; 930} 931 932/* Movability of hugepages depends on migration support. */ 933static inline gfp_t htlb_alloc_mask(struct hstate *h) 934{ 935 gfp_t gfp = __GFP_COMP | __GFP_NOWARN; 936 937 gfp |= hugepage_movable_supported(h) ? GFP_HIGHUSER_MOVABLE : GFP_HIGHUSER; 938 939 return gfp; 940} 941 942static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask) 943{ 944 gfp_t modified_mask = htlb_alloc_mask(h); 945 946 /* Some callers might want to enforce node */ 947 modified_mask |= (gfp_mask & __GFP_THISNODE); 948 949 modified_mask |= (gfp_mask & __GFP_NOWARN); 950 951 return modified_mask; 952} 953 954static inline bool htlb_allow_alloc_fallback(int reason) 955{ 956 bool allowed_fallback = false; 957 958 /* 959 * Note: the memory offline, memory failure and migration syscalls will 960 * be allowed to fallback to other nodes due to lack of a better chioce, 961 * that might break the per-node hugetlb pool. While other cases will 962 * set the __GFP_THISNODE to avoid breaking the per-node hugetlb pool. 963 */ 964 switch (reason) { 965 case MR_MEMORY_HOTPLUG: 966 case MR_MEMORY_FAILURE: 967 case MR_SYSCALL: 968 case MR_MEMPOLICY_MBIND: 969 allowed_fallback = true; 970 break; 971 default: 972 break; 973 } 974 975 return allowed_fallback; 976} 977 978static inline spinlock_t *huge_pte_lockptr(struct hstate *h, 979 struct mm_struct *mm, pte_t *pte) 980{ 981 const unsigned long size = huge_page_size(h); 982 983 VM_WARN_ON(size == PAGE_SIZE); 984 985 /* 986 * hugetlb must use the exact same PT locks as core-mm page table 987 * walkers would. When modifying a PTE table, hugetlb must take the 988 * PTE PT lock, when modifying a PMD table, hugetlb must take the PMD 989 * PT lock etc. 990 * 991 * The expectation is that any hugetlb folio smaller than a PMD is 992 * always mapped into a single PTE table and that any hugetlb folio 993 * smaller than a PUD (but at least as big as a PMD) is always mapped 994 * into a single PMD table. 995 * 996 * If that does not hold for an architecture, then that architecture 997 * must disable split PT locks such that all *_lockptr() functions 998 * will give us the same result: the per-MM PT lock. 999 * 1000 * Note that with e.g., CONFIG_PGTABLE_LEVELS=2 where 1001 * PGDIR_SIZE==P4D_SIZE==PUD_SIZE==PMD_SIZE, we'd use pud_lockptr() 1002 * and core-mm would use pmd_lockptr(). However, in such configurations 1003 * split PMD locks are disabled -- they don't make sense on a single 1004 * PGDIR page table -- and the end result is the same. 1005 */ 1006 if (size >= PUD_SIZE) 1007 return pud_lockptr(mm, (pud_t *) pte); 1008 else if (size >= PMD_SIZE || IS_ENABLED(CONFIG_HIGHPTE)) 1009 return pmd_lockptr(mm, (pmd_t *) pte); 1010 /* pte_alloc_huge() only applies with !CONFIG_HIGHPTE */ 1011 return ptep_lockptr(mm, pte); 1012} 1013 1014#ifndef hugepages_supported 1015/* 1016 * Some platform decide whether they support huge pages at boot 1017 * time. Some of them, such as powerpc, set HPAGE_SHIFT to 0 1018 * when there is no such support 1019 */ 1020#define hugepages_supported() (HPAGE_SHIFT != 0) 1021#endif 1022 1023void hugetlb_report_usage(struct seq_file *m, struct mm_struct *mm); 1024 1025static inline void hugetlb_count_init(struct mm_struct *mm) 1026{ 1027 atomic_long_set(&mm->hugetlb_usage, 0); 1028} 1029 1030static inline void hugetlb_count_add(long l, struct mm_struct *mm) 1031{ 1032 atomic_long_add(l, &mm->hugetlb_usage); 1033} 1034 1035static inline void hugetlb_count_sub(long l, struct mm_struct *mm) 1036{ 1037 atomic_long_sub(l, &mm->hugetlb_usage); 1038} 1039 1040#ifndef huge_ptep_modify_prot_start 1041#define huge_ptep_modify_prot_start huge_ptep_modify_prot_start 1042static inline pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, 1043 unsigned long addr, pte_t *ptep) 1044{ 1045 unsigned long psize = huge_page_size(hstate_vma(vma)); 1046 1047 return huge_ptep_get_and_clear(vma->vm_mm, addr, ptep, psize); 1048} 1049#endif 1050 1051#ifndef huge_ptep_modify_prot_commit 1052#define huge_ptep_modify_prot_commit huge_ptep_modify_prot_commit 1053static inline void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, 1054 unsigned long addr, pte_t *ptep, 1055 pte_t old_pte, pte_t pte) 1056{ 1057 unsigned long psize = huge_page_size(hstate_vma(vma)); 1058 1059 set_huge_pte_at(vma->vm_mm, addr, ptep, pte, psize); 1060} 1061#endif 1062 1063#ifdef CONFIG_NUMA 1064void hugetlb_register_node(struct node *node); 1065void hugetlb_unregister_node(struct node *node); 1066#endif 1067 1068/* 1069 * Check if a given raw @page in a hugepage is HWPOISON. 1070 */ 1071bool is_raw_hwpoison_page_in_hugepage(struct page *page); 1072 1073static inline unsigned long huge_page_mask_align(struct file *file) 1074{ 1075 return PAGE_MASK & ~huge_page_mask(hstate_file(file)); 1076} 1077 1078#else /* CONFIG_HUGETLB_PAGE */ 1079struct hstate {}; 1080 1081static inline unsigned long huge_page_mask_align(struct file *file) 1082{ 1083 return 0; 1084} 1085 1086static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio) 1087{ 1088 return NULL; 1089} 1090 1091static inline struct folio *filemap_lock_hugetlb_folio(struct hstate *h, 1092 struct address_space *mapping, pgoff_t idx) 1093{ 1094 return NULL; 1095} 1096 1097static inline int isolate_or_dissolve_huge_folio(struct folio *folio, 1098 struct list_head *list) 1099{ 1100 return -ENOMEM; 1101} 1102 1103static inline int replace_free_hugepage_folios(unsigned long start_pfn, 1104 unsigned long end_pfn) 1105{ 1106 return 0; 1107} 1108 1109static inline void wait_for_freed_hugetlb_folios(void) 1110{ 1111} 1112 1113static inline struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, 1114 unsigned long addr, 1115 bool cow_from_owner) 1116{ 1117 return NULL; 1118} 1119 1120static inline struct folio * 1121alloc_hugetlb_folio_reserve(struct hstate *h, int preferred_nid, 1122 nodemask_t *nmask, gfp_t gfp_mask) 1123{ 1124 return NULL; 1125} 1126 1127static inline struct folio * 1128alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid, 1129 nodemask_t *nmask, gfp_t gfp_mask, 1130 bool allow_alloc_fallback) 1131{ 1132 return NULL; 1133} 1134 1135static inline int __alloc_bootmem_huge_page(struct hstate *h) 1136{ 1137 return 0; 1138} 1139 1140static inline struct hstate *hstate_file(struct file *f) 1141{ 1142 return NULL; 1143} 1144 1145static inline struct hstate *hstate_sizelog(int page_size_log) 1146{ 1147 return NULL; 1148} 1149 1150static inline struct hstate *hstate_vma(struct vm_area_struct *vma) 1151{ 1152 return NULL; 1153} 1154 1155static inline struct hstate *folio_hstate(struct folio *folio) 1156{ 1157 return NULL; 1158} 1159 1160static inline struct hstate *size_to_hstate(unsigned long size) 1161{ 1162 return NULL; 1163} 1164 1165static inline unsigned long huge_page_size(struct hstate *h) 1166{ 1167 return PAGE_SIZE; 1168} 1169 1170static inline unsigned long huge_page_mask(struct hstate *h) 1171{ 1172 return PAGE_MASK; 1173} 1174 1175static inline unsigned long vma_kernel_pagesize(struct vm_area_struct *vma) 1176{ 1177 return PAGE_SIZE; 1178} 1179 1180static inline unsigned long vma_mmu_pagesize(struct vm_area_struct *vma) 1181{ 1182 return PAGE_SIZE; 1183} 1184 1185static inline unsigned int huge_page_order(struct hstate *h) 1186{ 1187 return 0; 1188} 1189 1190static inline unsigned int huge_page_shift(struct hstate *h) 1191{ 1192 return PAGE_SHIFT; 1193} 1194 1195static inline bool hstate_is_gigantic(struct hstate *h) 1196{ 1197 return false; 1198} 1199 1200static inline unsigned int pages_per_huge_page(struct hstate *h) 1201{ 1202 return 1; 1203} 1204 1205static inline unsigned hstate_index_to_shift(unsigned index) 1206{ 1207 return 0; 1208} 1209 1210static inline int hstate_index(struct hstate *h) 1211{ 1212 return 0; 1213} 1214 1215static inline int dissolve_free_hugetlb_folio(struct folio *folio) 1216{ 1217 return 0; 1218} 1219 1220static inline int dissolve_free_hugetlb_folios(unsigned long start_pfn, 1221 unsigned long end_pfn) 1222{ 1223 return 0; 1224} 1225 1226static inline bool hugepage_migration_supported(struct hstate *h) 1227{ 1228 return false; 1229} 1230 1231static inline bool hugepage_movable_supported(struct hstate *h) 1232{ 1233 return false; 1234} 1235 1236static inline gfp_t htlb_alloc_mask(struct hstate *h) 1237{ 1238 return 0; 1239} 1240 1241static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask) 1242{ 1243 return 0; 1244} 1245 1246static inline bool htlb_allow_alloc_fallback(int reason) 1247{ 1248 return false; 1249} 1250 1251static inline spinlock_t *huge_pte_lockptr(struct hstate *h, 1252 struct mm_struct *mm, pte_t *pte) 1253{ 1254 return &mm->page_table_lock; 1255} 1256 1257static inline void hugetlb_count_init(struct mm_struct *mm) 1258{ 1259} 1260 1261static inline void hugetlb_report_usage(struct seq_file *f, struct mm_struct *m) 1262{ 1263} 1264 1265static inline void hugetlb_count_sub(long l, struct mm_struct *mm) 1266{ 1267} 1268 1269static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, 1270 unsigned long addr, pte_t *ptep) 1271{ 1272#ifdef CONFIG_MMU 1273 return ptep_get(ptep); 1274#else 1275 return *ptep; 1276#endif 1277} 1278 1279static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, 1280 pte_t *ptep, pte_t pte, unsigned long sz) 1281{ 1282} 1283 1284static inline void hugetlb_register_node(struct node *node) 1285{ 1286} 1287 1288static inline void hugetlb_unregister_node(struct node *node) 1289{ 1290} 1291 1292static inline bool hugetlbfs_pagecache_present( 1293 struct hstate *h, struct vm_area_struct *vma, unsigned long address) 1294{ 1295 return false; 1296} 1297 1298static inline void hugetlb_bootmem_alloc(void) 1299{ 1300} 1301 1302static inline bool hugetlb_bootmem_allocated(void) 1303{ 1304 return false; 1305} 1306#endif /* CONFIG_HUGETLB_PAGE */ 1307 1308static inline spinlock_t *huge_pte_lock(struct hstate *h, 1309 struct mm_struct *mm, pte_t *pte) 1310{ 1311 spinlock_t *ptl; 1312 1313 ptl = huge_pte_lockptr(h, mm, pte); 1314 spin_lock(ptl); 1315 return ptl; 1316} 1317 1318#if defined(CONFIG_HUGETLB_PAGE) && defined(CONFIG_CMA) 1319extern void __init hugetlb_cma_reserve(int order); 1320#else 1321static inline __init void hugetlb_cma_reserve(int order) 1322{ 1323} 1324#endif 1325 1326#ifdef CONFIG_HUGETLB_PMD_PAGE_TABLE_SHARING 1327static inline bool hugetlb_pmd_shared(pte_t *pte) 1328{ 1329 return page_count(virt_to_page(pte)) > 1; 1330} 1331#else 1332static inline bool hugetlb_pmd_shared(pte_t *pte) 1333{ 1334 return false; 1335} 1336#endif 1337 1338bool want_pmd_share(struct vm_area_struct *vma, unsigned long addr); 1339 1340#ifndef __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE 1341/* 1342 * ARCHes with special requirements for evicting HUGETLB backing TLB entries can 1343 * implement this. 1344 */ 1345#define flush_hugetlb_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end) 1346#endif 1347 1348static inline bool __vma_shareable_lock(struct vm_area_struct *vma) 1349{ 1350 return (vma->vm_flags & VM_MAYSHARE) && vma->vm_private_data; 1351} 1352 1353bool __vma_private_lock(struct vm_area_struct *vma); 1354 1355/* 1356 * Safe version of huge_pte_offset() to check the locks. See comments 1357 * above huge_pte_offset(). 1358 */ 1359static inline pte_t * 1360hugetlb_walk(struct vm_area_struct *vma, unsigned long addr, unsigned long sz) 1361{ 1362#if defined(CONFIG_HUGETLB_PMD_PAGE_TABLE_SHARING) && defined(CONFIG_LOCKDEP) 1363 struct hugetlb_vma_lock *vma_lock = vma->vm_private_data; 1364 1365 /* 1366 * If pmd sharing possible, locking needed to safely walk the 1367 * hugetlb pgtables. More information can be found at the comment 1368 * above huge_pte_offset() in the same file. 1369 * 1370 * NOTE: lockdep_is_held() is only defined with CONFIG_LOCKDEP. 1371 */ 1372 if (__vma_shareable_lock(vma)) 1373 WARN_ON_ONCE(!lockdep_is_held(&vma_lock->rw_sema) && 1374 !lockdep_is_held( 1375 &vma->vm_file->f_mapping->i_mmap_rwsem)); 1376#endif 1377 return huge_pte_offset(vma->vm_mm, addr, sz); 1378} 1379 1380#endif /* _LINUX_HUGETLB_H */