Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

mm: hugetlb: simplify per-node sysfs creation and removal

Patch series "simplify handling of per-node sysfs creation and removal",
v4.


This patch (of 2):

The following commit offload per-node sysfs creation and removal to a
kworker and did not say why it is needed. And it also said "I don't know
that this is absolutely required". It seems like the author was not sure
as well. Since it only complicates the code, this patch will revert the
changes to simplify the code.

39da08cb074c ("hugetlb: offload per node attribute registrations")

We could use memory hotplug notifier to do per-node sysfs creation and
removal instead of inserting those operations to node registration and
unregistration. Then, it can reduce the code coupling between node.c and
hugetlb.c. Also, it can simplify the code.

Link: https://lkml.kernel.org/r/20220914072603.60293-1-songmuchun@bytedance.com
Link: https://lkml.kernel.org/r/20220914072603.60293-2-songmuchun@bytedance.com
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Acked-by: Mike Kravetz <mike.kravetz@oracle.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Rafael J. Wysocki <rafael@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Muchun Song and committed by
Andrew Morton
b958d4d0 aaa31e05

+30 -168
+3 -136
drivers/base/node.c
··· 587 587 NULL 588 588 }; 589 589 590 - #ifdef CONFIG_HUGETLBFS 591 - /* 592 - * hugetlbfs per node attributes registration interface: 593 - * When/if hugetlb[fs] subsystem initializes [sometime after this module], 594 - * it will register its per node attributes for all online nodes with 595 - * memory. It will also call register_hugetlbfs_with_node(), below, to 596 - * register its attribute registration functions with this node driver. 597 - * Once these hooks have been initialized, the node driver will call into 598 - * the hugetlb module to [un]register attributes for hot-plugged nodes. 599 - */ 600 - static node_registration_func_t __hugetlb_register_node; 601 - static node_registration_func_t __hugetlb_unregister_node; 602 - 603 - static inline bool hugetlb_register_node(struct node *node) 604 - { 605 - if (__hugetlb_register_node && 606 - node_state(node->dev.id, N_MEMORY)) { 607 - __hugetlb_register_node(node); 608 - return true; 609 - } 610 - return false; 611 - } 612 - 613 - static inline void hugetlb_unregister_node(struct node *node) 614 - { 615 - if (__hugetlb_unregister_node) 616 - __hugetlb_unregister_node(node); 617 - } 618 - 619 - void register_hugetlbfs_with_node(node_registration_func_t doregister, 620 - node_registration_func_t unregister) 621 - { 622 - __hugetlb_register_node = doregister; 623 - __hugetlb_unregister_node = unregister; 624 - } 625 - #else 626 - static inline void hugetlb_register_node(struct node *node) {} 627 - 628 - static inline void hugetlb_unregister_node(struct node *node) {} 629 - #endif 630 - 631 590 static void node_device_release(struct device *dev) 632 591 { 633 - struct node *node = to_node(dev); 634 - 635 - #if defined(CONFIG_MEMORY_HOTPLUG) && defined(CONFIG_HUGETLBFS) 636 - /* 637 - * We schedule the work only when a memory section is 638 - * onlined/offlined on this node. When we come here, 639 - * all the memory on this node has been offlined, 640 - * so we won't enqueue new work to this work. 641 - * 642 - * The work is using node->node_work, so we should 643 - * flush work before freeing the memory. 644 - */ 645 - flush_work(&node->node_work); 646 - #endif 647 - kfree(node); 592 + kfree(to_node(dev)); 648 593 } 649 594 650 595 /* ··· 610 665 611 666 if (error) 612 667 put_device(&node->dev); 613 - else { 614 - hugetlb_register_node(node); 615 - 668 + else 616 669 compaction_register_node(node); 617 - } 670 + 618 671 return error; 619 672 } 620 673 ··· 626 683 void unregister_node(struct node *node) 627 684 { 628 685 compaction_unregister_node(node); 629 - hugetlb_unregister_node(node); /* no-op, if memoryless node */ 630 686 node_remove_accesses(node); 631 687 node_remove_caches(node); 632 688 device_unregister(&node->dev); ··· 847 905 (void *)&nid, func); 848 906 return; 849 907 } 850 - 851 - #ifdef CONFIG_HUGETLBFS 852 - /* 853 - * Handle per node hstate attribute [un]registration on transistions 854 - * to/from memoryless state. 855 - */ 856 - static void node_hugetlb_work(struct work_struct *work) 857 - { 858 - struct node *node = container_of(work, struct node, node_work); 859 - 860 - /* 861 - * We only get here when a node transitions to/from memoryless state. 862 - * We can detect which transition occurred by examining whether the 863 - * node has memory now. hugetlb_register_node() already check this 864 - * so we try to register the attributes. If that fails, then the 865 - * node has transitioned to memoryless, try to unregister the 866 - * attributes. 867 - */ 868 - if (!hugetlb_register_node(node)) 869 - hugetlb_unregister_node(node); 870 - } 871 - 872 - static void init_node_hugetlb_work(int nid) 873 - { 874 - INIT_WORK(&node_devices[nid]->node_work, node_hugetlb_work); 875 - } 876 - 877 - static int node_memory_callback(struct notifier_block *self, 878 - unsigned long action, void *arg) 879 - { 880 - struct memory_notify *mnb = arg; 881 - int nid = mnb->status_change_nid; 882 - 883 - switch (action) { 884 - case MEM_ONLINE: 885 - case MEM_OFFLINE: 886 - /* 887 - * offload per node hstate [un]registration to a work thread 888 - * when transitioning to/from memoryless state. 889 - */ 890 - if (nid != NUMA_NO_NODE) 891 - schedule_work(&node_devices[nid]->node_work); 892 - break; 893 - 894 - case MEM_GOING_ONLINE: 895 - case MEM_GOING_OFFLINE: 896 - case MEM_CANCEL_ONLINE: 897 - case MEM_CANCEL_OFFLINE: 898 - default: 899 - break; 900 - } 901 - 902 - return NOTIFY_OK; 903 - } 904 - #endif /* CONFIG_HUGETLBFS */ 905 908 #endif /* CONFIG_MEMORY_HOTPLUG */ 906 - 907 - #if !defined(CONFIG_MEMORY_HOTPLUG) || !defined(CONFIG_HUGETLBFS) 908 - static inline int node_memory_callback(struct notifier_block *self, 909 - unsigned long action, void *arg) 910 - { 911 - return NOTIFY_OK; 912 - } 913 - 914 - static void init_node_hugetlb_work(int nid) { } 915 - 916 - #endif 917 909 918 910 int __register_one_node(int nid) 919 911 { ··· 867 991 } 868 992 869 993 INIT_LIST_HEAD(&node_devices[nid]->access_list); 870 - /* initialize work queue for memory hot plug */ 871 - init_node_hugetlb_work(nid); 872 994 node_init_caches(nid); 873 995 874 996 return error; ··· 937 1063 NULL, 938 1064 }; 939 1065 940 - #define NODE_CALLBACK_PRI 2 /* lower than SLAB */ 941 1066 void __init node_dev_init(void) 942 1067 { 943 - static struct notifier_block node_memory_callback_nb = { 944 - .notifier_call = node_memory_callback, 945 - .priority = NODE_CALLBACK_PRI, 946 - }; 947 1068 int ret, i; 948 1069 949 1070 BUILD_BUG_ON(ARRAY_SIZE(node_state_attr) != NR_NODE_STATES); ··· 947 1078 ret = subsys_system_register(&node_subsys, cpu_root_attr_groups); 948 1079 if (ret) 949 1080 panic("%s() failed to register subsystem: %d\n", __func__, ret); 950 - 951 - register_hotmemory_notifier(&node_memory_callback_nb); 952 1081 953 1082 /* 954 1083 * Create all node devices, which will properly link the node
+4 -20
include/linux/node.h
··· 2 2 /* 3 3 * include/linux/node.h - generic node definition 4 4 * 5 - * This is mainly for topological representation. We define the 6 - * basic 'struct node' here, which can be embedded in per-arch 5 + * This is mainly for topological representation. We define the 6 + * basic 'struct node' here, which can be embedded in per-arch 7 7 * definitions of processors. 8 8 * 9 9 * Basic handling of the devices is done in drivers/base/node.c 10 - * and system devices are handled in drivers/base/sys.c. 10 + * and system devices are handled in drivers/base/sys.c. 11 11 * 12 12 * Nodes are exported via driverfs in the class/node/devices/ 13 - * directory. 13 + * directory. 14 14 */ 15 15 #ifndef _LINUX_NODE_H_ 16 16 #define _LINUX_NODE_H_ ··· 18 18 #include <linux/device.h> 19 19 #include <linux/cpumask.h> 20 20 #include <linux/list.h> 21 - #include <linux/workqueue.h> 22 21 23 22 /** 24 23 * struct node_hmem_attrs - heterogeneous memory performance attributes ··· 83 84 struct node { 84 85 struct device dev; 85 86 struct list_head access_list; 86 - 87 - #if defined(CONFIG_MEMORY_HOTPLUG) && defined(CONFIG_HUGETLBFS) 88 - struct work_struct node_work; 89 - #endif 90 87 #ifdef CONFIG_HMEM_REPORTING 91 88 struct list_head cache_attrs; 92 89 struct device *cache_dev; ··· 91 96 92 97 struct memory_block; 93 98 extern struct node *node_devices[]; 94 - typedef void (*node_registration_func_t)(struct node *); 95 99 96 100 #if defined(CONFIG_MEMORY_HOTPLUG) && defined(CONFIG_NUMA) 97 101 void register_memory_blocks_under_node(int nid, unsigned long start_pfn, ··· 138 144 extern int register_memory_node_under_compute_node(unsigned int mem_nid, 139 145 unsigned int cpu_nid, 140 146 unsigned access); 141 - 142 - #ifdef CONFIG_HUGETLBFS 143 - extern void register_hugetlbfs_with_node(node_registration_func_t doregister, 144 - node_registration_func_t unregister); 145 - #endif 146 147 #else 147 148 static inline void node_dev_init(void) 148 149 { ··· 163 174 return 0; 164 175 } 165 176 static inline void unregister_memory_block_under_nodes(struct memory_block *mem_blk) 166 - { 167 - } 168 - 169 - static inline void register_hugetlbfs_with_node(node_registration_func_t reg, 170 - node_registration_func_t unreg) 171 177 { 172 178 } 173 179 #endif
+23 -12
mm/hugetlb.c
··· 33 33 #include <linux/migrate.h> 34 34 #include <linux/nospec.h> 35 35 #include <linux/delayacct.h> 36 + #include <linux/memory.h> 36 37 37 38 #include <asm/page.h> 38 39 #include <asm/pgalloc.h> ··· 4001 4000 } 4002 4001 } 4003 4002 4003 + static int __meminit hugetlb_memory_callback(struct notifier_block *self, 4004 + unsigned long action, void *arg) 4005 + { 4006 + struct memory_notify *mnb = arg; 4007 + int nid = mnb->status_change_nid; 4008 + 4009 + if (nid == NUMA_NO_NODE) 4010 + return NOTIFY_DONE; 4011 + 4012 + if (action == MEM_GOING_ONLINE) 4013 + hugetlb_register_node(node_devices[nid]); 4014 + else if (action == MEM_CANCEL_ONLINE || action == MEM_OFFLINE) 4015 + hugetlb_unregister_node(node_devices[nid]); 4016 + 4017 + return NOTIFY_OK; 4018 + } 4019 + 4004 4020 /* 4005 4021 * hugetlb init time: register hstate attributes for all registered node 4006 4022 * devices of nodes that have memory. All on-line nodes should have ··· 4027 4009 { 4028 4010 int nid; 4029 4011 4030 - for_each_node_state(nid, N_MEMORY) { 4031 - struct node *node = node_devices[nid]; 4032 - if (node->dev.id == nid) 4033 - hugetlb_register_node(node); 4034 - } 4035 - 4036 - /* 4037 - * Let the node device driver know we're here so it can 4038 - * [un]register hstate attributes on node hotplug. 4039 - */ 4040 - register_hugetlbfs_with_node(hugetlb_register_node, 4041 - hugetlb_unregister_node); 4012 + get_online_mems(); 4013 + hotplug_memory_notifier(hugetlb_memory_callback, 0); 4014 + for_each_node_state(nid, N_MEMORY) 4015 + hugetlb_register_node(node_devices[nid]); 4016 + put_online_mems(); 4042 4017 } 4043 4018 #else /* !CONFIG_NUMA */ 4044 4019