Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

mm: hugetlb_vmemmap: improve hugetlb_vmemmap code readability

There is a discussion about the name of hugetlb_vmemmap_alloc/free in
thread [1]. The suggestion suggested by David is rename "alloc/free" to
"optimize/restore" to make functionalities clearer to users, "optimize"
means the function will optimize vmemmap pages, while "restore" means
restoring its vmemmap pages discared before. This commit does this.

Another discussion is the confusion RESERVE_VMEMMAP_NR isn't used
explicitly for vmemmap_addr but implicitly for vmemmap_end in
hugetlb_vmemmap_alloc/free. David suggested we can compute what
hugetlb_vmemmap_init() does now at runtime. We do not need to worry for
the overhead of computing at runtime since the calculation is simple
enough and those functions are not in a hot path. This commit has the
following improvements:

1) The function suffixed name ("optimize/restore") is more expressive.
2) The logic becomes less weird in hugetlb_vmemmap_optimize/restore().
3) The hugetlb_vmemmap_init() does not need to be exported anymore.
4) A ->optimize_vmemmap_pages field in struct hstate is killed.
5) There is only one place where checks is_power_of_2(sizeof(struct
page)) instead of two places.
6) Add more comments for hugetlb_vmemmap_optimize/restore().
7) For external users, hugetlb_optimize_vmemmap_pages() is used for
detecting if the HugeTLB's vmemmap pages is optimizable originally.
In this commit, it is killed and we introduce a new helper
hugetlb_vmemmap_optimizable() to replace it. The name is more
expressive.

Link: https://lore.kernel.org/all/20220404074652.68024-2-songmuchun@bytedance.com/ [1]
Link: https://lkml.kernel.org/r/20220628092235.91270-7-songmuchun@bytedance.com
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Will Deacon <will@kernel.org>
Cc: Xiongchun Duan <duanxiongchun@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Muchun Song and committed by
Andrew Morton
6213834c 30152245

+102 -108
+2 -5
include/linux/hugetlb.h
··· 638 638 unsigned int nr_huge_pages_node[MAX_NUMNODES]; 639 639 unsigned int free_huge_pages_node[MAX_NUMNODES]; 640 640 unsigned int surplus_huge_pages_node[MAX_NUMNODES]; 641 - #ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP 642 - unsigned int optimize_vmemmap_pages; 643 - #endif 644 641 #ifdef CONFIG_CGROUP_HUGETLB 645 642 /* cgroup control files */ 646 643 struct cftype cgroup_files_dfl[8]; ··· 713 716 return hstate_file(vma->vm_file); 714 717 } 715 718 716 - static inline unsigned long huge_page_size(struct hstate *h) 719 + static inline unsigned long huge_page_size(const struct hstate *h) 717 720 { 718 721 return (unsigned long)PAGE_SIZE << h->order; 719 722 } ··· 742 745 return huge_page_order(h) >= MAX_ORDER; 743 746 } 744 747 745 - static inline unsigned int pages_per_huge_page(struct hstate *h) 748 + static inline unsigned int pages_per_huge_page(const struct hstate *h) 746 749 { 747 750 return 1 << h->order; 748 751 }
+4
include/linux/sysctl.h
··· 268 268 return NULL; 269 269 } 270 270 271 + static inline void register_sysctl_init(const char *path, struct ctl_table *table) 272 + { 273 + } 274 + 271 275 static inline struct ctl_table_header *register_sysctl_mount_point(const char *path) 272 276 { 273 277 return NULL;
+8 -7
mm/hugetlb.c
··· 1535 1535 if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported()) 1536 1536 return; 1537 1537 1538 - if (hugetlb_vmemmap_alloc(h, page)) { 1538 + if (hugetlb_vmemmap_restore(h, page)) { 1539 1539 spin_lock_irq(&hugetlb_lock); 1540 1540 /* 1541 1541 * If we cannot allocate vmemmap pages, just refuse to free the ··· 1612 1612 1613 1613 static inline void flush_free_hpage_work(struct hstate *h) 1614 1614 { 1615 - if (hugetlb_optimize_vmemmap_pages(h)) 1615 + if (hugetlb_vmemmap_optimizable(h)) 1616 1616 flush_work(&free_hpage_work); 1617 1617 } 1618 1618 ··· 1734 1734 1735 1735 static void __prep_new_huge_page(struct hstate *h, struct page *page) 1736 1736 { 1737 - hugetlb_vmemmap_free(h, page); 1737 + hugetlb_vmemmap_optimize(h, page); 1738 1738 INIT_LIST_HEAD(&page->lru); 1739 1739 set_compound_page_dtor(page, HUGETLB_PAGE_DTOR); 1740 1740 hugetlb_set_page_subpool(page, NULL); ··· 2107 2107 * Attempt to allocate vmemmmap here so that we can take 2108 2108 * appropriate action on failure. 2109 2109 */ 2110 - rc = hugetlb_vmemmap_alloc(h, head); 2110 + rc = hugetlb_vmemmap_restore(h, head); 2111 2111 if (!rc) { 2112 2112 /* 2113 2113 * Move PageHWPoison flag from head page to the raw ··· 3182 3182 char buf[32]; 3183 3183 3184 3184 string_get_size(huge_page_size(h), 1, STRING_UNITS_2, buf, 32); 3185 - pr_info("HugeTLB registered %s page size, pre-allocated %ld pages\n", 3185 + pr_info("HugeTLB: registered %s page size, pre-allocated %ld pages\n", 3186 3186 buf, h->free_huge_pages); 3187 + pr_info("HugeTLB: %d KiB vmemmap can be freed for a %s page\n", 3188 + hugetlb_vmemmap_optimizable_size(h) / SZ_1K, buf); 3187 3189 } 3188 3190 } 3189 3191 ··· 3423 3421 remove_hugetlb_page_for_demote(h, page, false); 3424 3422 spin_unlock_irq(&hugetlb_lock); 3425 3423 3426 - rc = hugetlb_vmemmap_alloc(h, page); 3424 + rc = hugetlb_vmemmap_restore(h, page); 3427 3425 if (rc) { 3428 3426 /* Allocation of vmemmmap failed, we can not demote page */ 3429 3427 spin_lock_irq(&hugetlb_lock); ··· 4113 4111 h->next_nid_to_free = first_memory_node; 4114 4112 snprintf(h->name, HSTATE_NAME_LEN, "hugepages-%lukB", 4115 4113 huge_page_size(h)/1024); 4116 - hugetlb_vmemmap_init(h); 4117 4114 4118 4115 parsed_hstate = h; 4119 4116 }
+59 -82
mm/hugetlb_vmemmap.c
··· 35 35 struct list_head *vmemmap_pages; 36 36 }; 37 37 38 - /* 39 - * There are a lot of struct page structures associated with each HugeTLB page. 40 - * For tail pages, the value of compound_head is the same. So we can reuse first 41 - * page of head page structures. We map the virtual addresses of all the pages 42 - * of tail page structures to the head page struct, and then free these page 43 - * frames. Therefore, we need to reserve one pages as vmemmap areas. 44 - */ 45 - #define RESERVE_VMEMMAP_NR 1U 46 - #define RESERVE_VMEMMAP_SIZE (RESERVE_VMEMMAP_NR << PAGE_SHIFT) 47 - 48 38 static int __split_vmemmap_huge_pmd(pmd_t *pmd, unsigned long start) 49 39 { 50 40 pmd_t __pmd; ··· 416 426 static bool vmemmap_optimize_enabled = IS_ENABLED(CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON); 417 427 core_param(hugetlb_free_vmemmap, vmemmap_optimize_enabled, bool, 0); 418 428 419 - /* 420 - * Previously discarded vmemmap pages will be allocated and remapping 421 - * after this function returns zero. 429 + /** 430 + * hugetlb_vmemmap_restore - restore previously optimized (by 431 + * hugetlb_vmemmap_optimize()) vmemmap pages which 432 + * will be reallocated and remapped. 433 + * @h: struct hstate. 434 + * @head: the head page whose vmemmap pages will be restored. 435 + * 436 + * Return: %0 if @head's vmemmap pages have been reallocated and remapped, 437 + * negative error code otherwise. 422 438 */ 423 - int hugetlb_vmemmap_alloc(struct hstate *h, struct page *head) 439 + int hugetlb_vmemmap_restore(const struct hstate *h, struct page *head) 424 440 { 425 441 int ret; 426 - unsigned long vmemmap_addr = (unsigned long)head; 427 - unsigned long vmemmap_end, vmemmap_reuse, vmemmap_pages; 442 + unsigned long vmemmap_start = (unsigned long)head, vmemmap_end; 443 + unsigned long vmemmap_reuse; 428 444 429 445 if (!HPageVmemmapOptimized(head)) 430 446 return 0; 431 447 432 - vmemmap_addr += RESERVE_VMEMMAP_SIZE; 433 - vmemmap_pages = hugetlb_optimize_vmemmap_pages(h); 434 - vmemmap_end = vmemmap_addr + (vmemmap_pages << PAGE_SHIFT); 435 - vmemmap_reuse = vmemmap_addr - PAGE_SIZE; 448 + vmemmap_end = vmemmap_start + hugetlb_vmemmap_size(h); 449 + vmemmap_reuse = vmemmap_start; 450 + vmemmap_start += HUGETLB_VMEMMAP_RESERVE_SIZE; 436 451 437 452 /* 438 - * The pages which the vmemmap virtual address range [@vmemmap_addr, 453 + * The pages which the vmemmap virtual address range [@vmemmap_start, 439 454 * @vmemmap_end) are mapped to are freed to the buddy allocator, and 440 455 * the range is mapped to the page which @vmemmap_reuse is mapped to. 441 456 * When a HugeTLB page is freed to the buddy allocator, previously 442 457 * discarded vmemmap pages must be allocated and remapping. 443 458 */ 444 - ret = vmemmap_remap_alloc(vmemmap_addr, vmemmap_end, vmemmap_reuse, 459 + ret = vmemmap_remap_alloc(vmemmap_start, vmemmap_end, vmemmap_reuse, 445 460 GFP_KERNEL | __GFP_NORETRY | __GFP_THISNODE); 446 461 if (!ret) { 447 462 ClearHPageVmemmapOptimized(head); ··· 456 461 return ret; 457 462 } 458 463 459 - static unsigned int vmemmap_optimizable_pages(struct hstate *h, 460 - struct page *head) 464 + /* Return true iff a HugeTLB whose vmemmap should and can be optimized. */ 465 + static bool vmemmap_should_optimize(const struct hstate *h, const struct page *head) 461 466 { 462 467 if (!READ_ONCE(vmemmap_optimize_enabled)) 463 - return 0; 468 + return false; 469 + 470 + if (!hugetlb_vmemmap_optimizable(h)) 471 + return false; 464 472 465 473 if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG)) { 466 474 pmd_t *pmdp, pmd; ··· 506 508 * +-------------------------------------------+ 507 509 */ 508 510 if (PageVmemmapSelfHosted(vmemmap_page)) 509 - return 0; 511 + return false; 510 512 } 511 513 512 - return hugetlb_optimize_vmemmap_pages(h); 514 + return true; 513 515 } 514 516 515 - void hugetlb_vmemmap_free(struct hstate *h, struct page *head) 517 + /** 518 + * hugetlb_vmemmap_optimize - optimize @head page's vmemmap pages. 519 + * @h: struct hstate. 520 + * @head: the head page whose vmemmap pages will be optimized. 521 + * 522 + * This function only tries to optimize @head's vmemmap pages and does not 523 + * guarantee that the optimization will succeed after it returns. The caller 524 + * can use HPageVmemmapOptimized(@head) to detect if @head's vmemmap pages 525 + * have been optimized. 526 + */ 527 + void hugetlb_vmemmap_optimize(const struct hstate *h, struct page *head) 516 528 { 517 - unsigned long vmemmap_addr = (unsigned long)head; 518 - unsigned long vmemmap_end, vmemmap_reuse, vmemmap_pages; 529 + unsigned long vmemmap_start = (unsigned long)head, vmemmap_end; 530 + unsigned long vmemmap_reuse; 519 531 520 - vmemmap_pages = vmemmap_optimizable_pages(h, head); 521 - if (!vmemmap_pages) 532 + if (!vmemmap_should_optimize(h, head)) 522 533 return; 523 534 524 535 static_branch_inc(&hugetlb_optimize_vmemmap_key); 525 536 526 - vmemmap_addr += RESERVE_VMEMMAP_SIZE; 527 - vmemmap_end = vmemmap_addr + (vmemmap_pages << PAGE_SHIFT); 528 - vmemmap_reuse = vmemmap_addr - PAGE_SIZE; 537 + vmemmap_end = vmemmap_start + hugetlb_vmemmap_size(h); 538 + vmemmap_reuse = vmemmap_start; 539 + vmemmap_start += HUGETLB_VMEMMAP_RESERVE_SIZE; 529 540 530 541 /* 531 - * Remap the vmemmap virtual address range [@vmemmap_addr, @vmemmap_end) 542 + * Remap the vmemmap virtual address range [@vmemmap_start, @vmemmap_end) 532 543 * to the page which @vmemmap_reuse is mapped to, then free the pages 533 - * which the range [@vmemmap_addr, @vmemmap_end] is mapped to. 544 + * which the range [@vmemmap_start, @vmemmap_end] is mapped to. 534 545 */ 535 - if (vmemmap_remap_free(vmemmap_addr, vmemmap_end, vmemmap_reuse)) 546 + if (vmemmap_remap_free(vmemmap_start, vmemmap_end, vmemmap_reuse)) 536 547 static_branch_dec(&hugetlb_optimize_vmemmap_key); 537 548 else 538 549 SetHPageVmemmapOptimized(head); 539 550 } 540 551 541 - void __init hugetlb_vmemmap_init(struct hstate *h) 542 - { 543 - unsigned int nr_pages = pages_per_huge_page(h); 544 - unsigned int vmemmap_pages; 545 - 546 - /* 547 - * There are only (RESERVE_VMEMMAP_SIZE / sizeof(struct page)) struct 548 - * page structs that can be used when HVO is enabled, add a BUILD_BUG_ON 549 - * to catch invalid usage of the tail page structs. 550 - */ 551 - BUILD_BUG_ON(__NR_USED_SUBPAGE >= 552 - RESERVE_VMEMMAP_SIZE / sizeof(struct page)); 553 - 554 - if (!is_power_of_2(sizeof(struct page))) { 555 - pr_warn_once("cannot optimize vmemmap pages because \"struct page\" crosses page boundaries\n"); 556 - return; 557 - } 558 - 559 - vmemmap_pages = (nr_pages * sizeof(struct page)) >> PAGE_SHIFT; 560 - /* 561 - * The head page is not to be freed to buddy allocator, the other tail 562 - * pages will map to the head page, so they can be freed. 563 - * 564 - * Could RESERVE_VMEMMAP_NR be greater than @vmemmap_pages? It is true 565 - * on some architectures (e.g. aarch64). See Documentation/arm64/ 566 - * hugetlbpage.rst for more details. 567 - */ 568 - if (likely(vmemmap_pages > RESERVE_VMEMMAP_NR)) 569 - h->optimize_vmemmap_pages = vmemmap_pages - RESERVE_VMEMMAP_NR; 570 - 571 - pr_info("can optimize %d vmemmap pages for %s\n", 572 - h->optimize_vmemmap_pages, h->name); 573 - } 574 - 575 - #ifdef CONFIG_PROC_SYSCTL 576 552 static struct ctl_table hugetlb_vmemmap_sysctls[] = { 577 553 { 578 554 .procname = "hugetlb_optimize_vmemmap", ··· 558 586 { } 559 587 }; 560 588 561 - static __init int hugetlb_vmemmap_sysctls_init(void) 589 + static int __init hugetlb_vmemmap_init(void) 562 590 { 563 - /* 564 - * If "struct page" crosses page boundaries, the vmemmap pages cannot 565 - * be optimized. 566 - */ 567 - if (is_power_of_2(sizeof(struct page))) 568 - register_sysctl_init("vm", hugetlb_vmemmap_sysctls); 591 + /* HUGETLB_VMEMMAP_RESERVE_SIZE should cover all used struct pages */ 592 + BUILD_BUG_ON(__NR_USED_SUBPAGE * sizeof(struct page) > HUGETLB_VMEMMAP_RESERVE_SIZE); 569 593 594 + if (IS_ENABLED(CONFIG_PROC_SYSCTL)) { 595 + const struct hstate *h; 596 + 597 + for_each_hstate(h) { 598 + if (hugetlb_vmemmap_optimizable(h)) { 599 + register_sysctl_init("vm", hugetlb_vmemmap_sysctls); 600 + break; 601 + } 602 + } 603 + } 570 604 return 0; 571 605 } 572 - late_initcall(hugetlb_vmemmap_sysctls_init); 573 - #endif /* CONFIG_PROC_SYSCTL */ 606 + late_initcall(hugetlb_vmemmap_init);
+29 -14
mm/hugetlb_vmemmap.h
··· 11 11 #include <linux/hugetlb.h> 12 12 13 13 #ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP 14 - int hugetlb_vmemmap_alloc(struct hstate *h, struct page *head); 15 - void hugetlb_vmemmap_free(struct hstate *h, struct page *head); 16 - void hugetlb_vmemmap_init(struct hstate *h); 14 + int hugetlb_vmemmap_restore(const struct hstate *h, struct page *head); 15 + void hugetlb_vmemmap_optimize(const struct hstate *h, struct page *head); 17 16 18 17 /* 19 - * How many vmemmap pages associated with a HugeTLB page that can be 20 - * optimized and freed to the buddy allocator. 18 + * Reserve one vmemmap page, all vmemmap addresses are mapped to it. See 19 + * Documentation/vm/vmemmap_dedup.rst. 21 20 */ 22 - static inline unsigned int hugetlb_optimize_vmemmap_pages(struct hstate *h) 21 + #define HUGETLB_VMEMMAP_RESERVE_SIZE PAGE_SIZE 22 + 23 + static inline unsigned int hugetlb_vmemmap_size(const struct hstate *h) 23 24 { 24 - return h->optimize_vmemmap_pages; 25 + return pages_per_huge_page(h) * sizeof(struct page); 26 + } 27 + 28 + /* 29 + * Return how many vmemmap size associated with a HugeTLB page that can be 30 + * optimized and can be freed to the buddy allocator. 31 + */ 32 + static inline unsigned int hugetlb_vmemmap_optimizable_size(const struct hstate *h) 33 + { 34 + int size = hugetlb_vmemmap_size(h) - HUGETLB_VMEMMAP_RESERVE_SIZE; 35 + 36 + if (!is_power_of_2(sizeof(struct page))) 37 + return 0; 38 + return size > 0 ? size : 0; 25 39 } 26 40 #else 27 - static inline int hugetlb_vmemmap_alloc(struct hstate *h, struct page *head) 41 + static inline int hugetlb_vmemmap_restore(const struct hstate *h, struct page *head) 28 42 { 29 43 return 0; 30 44 } 31 45 32 - static inline void hugetlb_vmemmap_free(struct hstate *h, struct page *head) 46 + static inline void hugetlb_vmemmap_optimize(const struct hstate *h, struct page *head) 33 47 { 34 48 } 35 49 36 - static inline void hugetlb_vmemmap_init(struct hstate *h) 37 - { 38 - } 39 - 40 - static inline unsigned int hugetlb_optimize_vmemmap_pages(struct hstate *h) 50 + static inline unsigned int hugetlb_vmemmap_optimizable_size(const struct hstate *h) 41 51 { 42 52 return 0; 43 53 } 44 54 #endif /* CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP */ 55 + 56 + static inline bool hugetlb_vmemmap_optimizable(const struct hstate *h) 57 + { 58 + return hugetlb_vmemmap_optimizable_size(h) != 0; 59 + } 45 60 #endif /* _LINUX_HUGETLB_VMEMMAP_H */