Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

mm/hugetlb: add pre-HVO framework

Define flags for pre-HVOed bootmem hugetlb pages, and act on them.

The most important flag is the HVO flag, signalling that a bootmem
allocated gigantic page has already been HVO-ed. If this flag is seen by
the hugetlb bootmem gather code, the page is marked as HVO optimized. The
HVO code will then not try to optimize it again. Instead, it will just
map the tail page mirror pages read-only, completing the HVO steps.

No functional change, as nothing sets the flags yet.

Link: https://lkml.kernel.org/r/20250228182928.2645936-18-fvdl@google.com
Signed-off-by: Frank van der Linden <fvdl@google.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Dan Carpenter <dan.carpenter@linaro.org>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Joao Martins <joao.m.martins@oracle.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Roman Gushchin (Cruise) <roman.gushchin@linux.dev>
Cc: Usama Arif <usamaarif642@gmail.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Yu Zhao <yuzhao@google.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Frank van der Linden and committed by
Andrew Morton
752fe17a 91ec7187

+83 -3
+1
arch/powerpc/mm/hugetlbpage.c
··· 113 113 gpage_freearray[nr_gpages] = 0; 114 114 list_add(&m->list, &huge_boot_pages[0]); 115 115 m->hstate = hstate; 116 + m->flags = 0; 116 117 return 1; 117 118 } 118 119
+4
include/linux/hugetlb.h
··· 681 681 struct huge_bootmem_page { 682 682 struct list_head list; 683 683 struct hstate *hstate; 684 + unsigned long flags; 684 685 }; 686 + 687 + #define HUGE_BOOTMEM_HVO 0x0001 688 + #define HUGE_BOOTMEM_ZONES_VALID 0x0002 685 689 686 690 int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list); 687 691 int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn);
+23 -1
mm/hugetlb.c
··· 3227 3227 INIT_LIST_HEAD(&m->list); 3228 3228 list_add(&m->list, &huge_boot_pages[node]); 3229 3229 m->hstate = h; 3230 + m->flags = 0; 3230 3231 return 1; 3231 3232 } 3232 3233 ··· 3295 3294 struct folio *folio, *tmp_f; 3296 3295 3297 3296 /* Send list for bulk vmemmap optimization processing */ 3298 - hugetlb_vmemmap_optimize_folios(h, folio_list); 3297 + hugetlb_vmemmap_optimize_bootmem_folios(h, folio_list); 3299 3298 3300 3299 list_for_each_entry_safe(folio, tmp_f, folio_list, lru) { 3301 3300 if (!folio_test_hugetlb_vmemmap_optimized(folio)) { ··· 3323 3322 { 3324 3323 unsigned long start_pfn; 3325 3324 bool valid; 3325 + 3326 + if (m->flags & HUGE_BOOTMEM_ZONES_VALID) { 3327 + /* 3328 + * Already validated, skip check. 3329 + */ 3330 + return true; 3331 + } 3326 3332 3327 3333 start_pfn = virt_to_phys(m) >> PAGE_SHIFT; 3328 3334 ··· 3361 3353 free_reserved_page(page); 3362 3354 page++; 3363 3355 } 3356 + } 3357 + 3358 + static bool __init hugetlb_bootmem_page_prehvo(struct huge_bootmem_page *m) 3359 + { 3360 + return (m->flags & HUGE_BOOTMEM_HVO); 3364 3361 } 3365 3362 3366 3363 /* ··· 3408 3395 hugetlb_folio_init_vmemmap(folio, h, 3409 3396 HUGETLB_VMEMMAP_RESERVE_PAGES); 3410 3397 init_new_hugetlb_folio(h, folio); 3398 + 3399 + if (hugetlb_bootmem_page_prehvo(m)) 3400 + /* 3401 + * If pre-HVO was done, just set the 3402 + * flag, the HVO code will then skip 3403 + * this folio. 3404 + */ 3405 + folio_set_hugetlb_vmemmap_optimized(folio); 3406 + 3411 3407 list_add(&folio->lru, &folio_list); 3412 3408 3413 3409 /*
+48 -2
mm/hugetlb_vmemmap.c
··· 649 649 return vmemmap_remap_split(vmemmap_start, vmemmap_end, vmemmap_reuse); 650 650 } 651 651 652 - void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_list) 652 + static void __hugetlb_vmemmap_optimize_folios(struct hstate *h, 653 + struct list_head *folio_list, 654 + bool boot) 653 655 { 654 656 struct folio *folio; 657 + int nr_to_optimize; 655 658 LIST_HEAD(vmemmap_pages); 656 659 unsigned long flags = VMEMMAP_REMAP_NO_TLB_FLUSH | VMEMMAP_SYNCHRONIZE_RCU; 657 660 661 + nr_to_optimize = 0; 658 662 list_for_each_entry(folio, folio_list, lru) { 659 - int ret = hugetlb_vmemmap_split_folio(h, folio); 663 + int ret; 664 + unsigned long spfn, epfn; 665 + 666 + if (boot && folio_test_hugetlb_vmemmap_optimized(folio)) { 667 + /* 668 + * Already optimized by pre-HVO, just map the 669 + * mirrored tail page structs RO. 670 + */ 671 + spfn = (unsigned long)&folio->page; 672 + epfn = spfn + pages_per_huge_page(h); 673 + vmemmap_wrprotect_hvo(spfn, epfn, folio_nid(folio), 674 + HUGETLB_VMEMMAP_RESERVE_SIZE); 675 + register_page_bootmem_memmap(pfn_to_section_nr(spfn), 676 + &folio->page, 677 + HUGETLB_VMEMMAP_RESERVE_SIZE); 678 + static_branch_inc(&hugetlb_optimize_vmemmap_key); 679 + continue; 680 + } 681 + 682 + nr_to_optimize++; 683 + 684 + ret = hugetlb_vmemmap_split_folio(h, folio); 660 685 661 686 /* 662 687 * Spliting the PMD requires allocating a page, thus lets fail ··· 692 667 if (ret == -ENOMEM) 693 668 break; 694 669 } 670 + 671 + if (!nr_to_optimize) 672 + /* 673 + * All pre-HVO folios, nothing left to do. It's ok if 674 + * there is a mix of pre-HVO and not yet HVO-ed folios 675 + * here, as __hugetlb_vmemmap_optimize_folio() will 676 + * skip any folios that already have the optimized flag 677 + * set, see vmemmap_should_optimize_folio(). 678 + */ 679 + goto out; 695 680 696 681 flush_tlb_all(); 697 682 ··· 728 693 } 729 694 } 730 695 696 + out: 731 697 flush_tlb_all(); 732 698 free_vmemmap_page_list(&vmemmap_pages); 699 + } 700 + 701 + void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_list) 702 + { 703 + __hugetlb_vmemmap_optimize_folios(h, folio_list, false); 704 + } 705 + 706 + void hugetlb_vmemmap_optimize_bootmem_folios(struct hstate *h, struct list_head *folio_list) 707 + { 708 + __hugetlb_vmemmap_optimize_folios(h, folio_list, true); 733 709 } 734 710 735 711 static const struct ctl_table hugetlb_vmemmap_sysctls[] = {
+7
mm/hugetlb_vmemmap.h
··· 24 24 struct list_head *non_hvo_folios); 25 25 void hugetlb_vmemmap_optimize_folio(const struct hstate *h, struct folio *folio); 26 26 void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_list); 27 + void hugetlb_vmemmap_optimize_bootmem_folios(struct hstate *h, struct list_head *folio_list); 28 + 27 29 28 30 static inline unsigned int hugetlb_vmemmap_size(const struct hstate *h) 29 31 { ··· 63 61 } 64 62 65 63 static inline void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_list) 64 + { 65 + } 66 + 67 + static inline void hugetlb_vmemmap_optimize_bootmem_folios(struct hstate *h, 68 + struct list_head *folio_list) 66 69 { 67 70 } 68 71