Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

mm/memory_hotplug: embed vmem_altmap details in memory block

With memmap on memory, some architecture needs more details w.r.t altmap
such as base_pfn, end_pfn, etc to unmap vmemmap memory. Instead of
computing them again when we remove a memory block, embed vmem_altmap
details in struct memory_block if we are using memmap on memory block
feature.

[yangyingliang@huawei.com: fix error return code in add_memory_resource()]
Link: https://lkml.kernel.org/r/20230809081552.1351184-1-yangyingliang@huawei.com
Link: https://lkml.kernel.org/r/20230808091501.287660-7-aneesh.kumar@linux.ibm.com
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Vishal Verma <vishal.l.verma@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Aneesh Kumar K.V and committed by
Andrew Morton
1a8c64e1 603fd64d

+54 -37
+17 -10
drivers/base/memory.c
··· 105 105 static void memory_block_release(struct device *dev) 106 106 { 107 107 struct memory_block *mem = to_memory_block(dev); 108 - 108 + /* Verify that the altmap is freed */ 109 + WARN_ON(mem->altmap); 109 110 kfree(mem); 110 111 } 111 112 ··· 184 183 { 185 184 unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr); 186 185 unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block; 187 - unsigned long nr_vmemmap_pages = mem->nr_vmemmap_pages; 186 + unsigned long nr_vmemmap_pages = 0; 188 187 struct zone *zone; 189 188 int ret; 190 189 ··· 201 200 * stage helps to keep accounting easier to follow - e.g vmemmaps 202 201 * belong to the same zone as the memory they backed. 203 202 */ 203 + if (mem->altmap) 204 + nr_vmemmap_pages = mem->altmap->free; 205 + 204 206 if (nr_vmemmap_pages) { 205 207 ret = mhp_init_memmap_on_memory(start_pfn, nr_vmemmap_pages, zone); 206 208 if (ret) ··· 234 230 { 235 231 unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr); 236 232 unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block; 237 - unsigned long nr_vmemmap_pages = mem->nr_vmemmap_pages; 233 + unsigned long nr_vmemmap_pages = 0; 238 234 int ret; 239 235 240 236 if (!mem->zone) ··· 244 240 * Unaccount before offlining, such that unpopulated zone and kthreads 245 241 * can properly be torn down in offline_pages(). 246 242 */ 243 + if (mem->altmap) 244 + nr_vmemmap_pages = mem->altmap->free; 245 + 247 246 if (nr_vmemmap_pages) 248 247 adjust_present_page_count(pfn_to_page(start_pfn), mem->group, 249 248 -nr_vmemmap_pages); ··· 733 726 #endif 734 727 735 728 static int add_memory_block(unsigned long block_id, unsigned long state, 736 - unsigned long nr_vmemmap_pages, 729 + struct vmem_altmap *altmap, 737 730 struct memory_group *group) 738 731 { 739 732 struct memory_block *mem; ··· 751 744 mem->start_section_nr = block_id * sections_per_block; 752 745 mem->state = state; 753 746 mem->nid = NUMA_NO_NODE; 754 - mem->nr_vmemmap_pages = nr_vmemmap_pages; 747 + mem->altmap = altmap; 755 748 INIT_LIST_HEAD(&mem->group_next); 756 749 757 750 #ifndef CONFIG_NUMA ··· 790 783 if (section_count == 0) 791 784 return 0; 792 785 return add_memory_block(memory_block_id(base_section_nr), 793 - MEM_ONLINE, 0, NULL); 786 + MEM_ONLINE, NULL, NULL); 794 787 } 795 788 796 789 static int add_hotplug_memory_block(unsigned long block_id, 797 - unsigned long nr_vmemmap_pages, 790 + struct vmem_altmap *altmap, 798 791 struct memory_group *group) 799 792 { 800 - return add_memory_block(block_id, MEM_OFFLINE, nr_vmemmap_pages, group); 793 + return add_memory_block(block_id, MEM_OFFLINE, altmap, group); 801 794 } 802 795 803 796 static void remove_memory_block(struct memory_block *memory) ··· 825 818 * Called under device_hotplug_lock. 826 819 */ 827 820 int create_memory_block_devices(unsigned long start, unsigned long size, 828 - unsigned long vmemmap_pages, 821 + struct vmem_altmap *altmap, 829 822 struct memory_group *group) 830 823 { 831 824 const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start)); ··· 839 832 return -EINVAL; 840 833 841 834 for (block_id = start_block_id; block_id != end_block_id; block_id++) { 842 - ret = add_hotplug_memory_block(block_id, vmemmap_pages, group); 835 + ret = add_hotplug_memory_block(block_id, altmap, group); 843 836 if (ret) 844 837 break; 845 838 }
+2 -6
include/linux/memory.h
··· 77 77 */ 78 78 struct zone *zone; 79 79 struct device dev; 80 - /* 81 - * Number of vmemmap pages. These pages 82 - * lay at the beginning of the memory block. 83 - */ 84 - unsigned long nr_vmemmap_pages; 80 + struct vmem_altmap *altmap; 85 81 struct memory_group *group; /* group (if any) for this block */ 86 82 struct list_head group_next; /* next block inside memory group */ 87 83 #if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG) ··· 143 147 extern int register_memory_notifier(struct notifier_block *nb); 144 148 extern void unregister_memory_notifier(struct notifier_block *nb); 145 149 int create_memory_block_devices(unsigned long start, unsigned long size, 146 - unsigned long vmemmap_pages, 150 + struct vmem_altmap *altmap, 147 151 struct memory_group *group); 148 152 void remove_memory_block_devices(unsigned long start, unsigned long size); 149 153 extern void memory_dev_init(void);
+35 -21
mm/memory_hotplug.c
··· 1439 1439 if (mhp_flags & MHP_MEMMAP_ON_MEMORY) { 1440 1440 if (mhp_supports_memmap_on_memory(size)) { 1441 1441 mhp_altmap.free = memory_block_memmap_on_memory_pages(); 1442 - params.altmap = &mhp_altmap; 1442 + params.altmap = kmalloc(sizeof(struct vmem_altmap), GFP_KERNEL); 1443 + if (!params.altmap) { 1444 + ret = -ENOMEM; 1445 + goto error; 1446 + } 1447 + 1448 + memcpy(params.altmap, &mhp_altmap, sizeof(mhp_altmap)); 1443 1449 } 1444 1450 /* fallback to not using altmap */ 1445 1451 } ··· 1453 1447 /* call arch's memory hotadd */ 1454 1448 ret = arch_add_memory(nid, start, size, &params); 1455 1449 if (ret < 0) 1456 - goto error; 1450 + goto error_free; 1457 1451 1458 1452 /* create memory block devices after memory was added */ 1459 - ret = create_memory_block_devices(start, size, mhp_altmap.free, group); 1453 + ret = create_memory_block_devices(start, size, params.altmap, group); 1460 1454 if (ret) { 1461 1455 arch_remove_memory(start, size, NULL); 1462 - goto error; 1456 + goto error_free; 1463 1457 } 1464 1458 1465 1459 if (new_node) { ··· 1496 1490 walk_memory_blocks(start, size, NULL, online_memory_block); 1497 1491 1498 1492 return ret; 1493 + error_free: 1494 + kfree(params.altmap); 1499 1495 error: 1500 1496 if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK)) 1501 1497 memblock_remove(start, size); ··· 2064 2056 return 0; 2065 2057 } 2066 2058 2067 - static int get_nr_vmemmap_pages_cb(struct memory_block *mem, void *arg) 2059 + static int test_has_altmap_cb(struct memory_block *mem, void *arg) 2068 2060 { 2061 + struct memory_block **mem_ptr = (struct memory_block **)arg; 2069 2062 /* 2070 - * If not set, continue with the next block. 2063 + * return the memblock if we have altmap 2064 + * and break callback. 2071 2065 */ 2072 - return mem->nr_vmemmap_pages; 2066 + if (mem->altmap) { 2067 + *mem_ptr = mem; 2068 + return 1; 2069 + } 2070 + return 0; 2073 2071 } 2074 2072 2075 2073 static int check_cpu_on_node(int nid) ··· 2150 2136 2151 2137 static int __ref try_remove_memory(u64 start, u64 size) 2152 2138 { 2153 - struct vmem_altmap mhp_altmap = {}; 2154 - struct vmem_altmap *altmap = NULL; 2155 - unsigned long nr_vmemmap_pages; 2139 + struct memory_block *mem; 2156 2140 int rc = 0, nid = NUMA_NO_NODE; 2141 + struct vmem_altmap *altmap = NULL; 2157 2142 2158 2143 BUG_ON(check_hotplug_memory_range(start, size)); 2159 2144 ··· 2174 2161 * the same granularity it was added - a single memory block. 2175 2162 */ 2176 2163 if (mhp_memmap_on_memory()) { 2177 - nr_vmemmap_pages = walk_memory_blocks(start, size, NULL, 2178 - get_nr_vmemmap_pages_cb); 2179 - if (nr_vmemmap_pages) { 2164 + rc = walk_memory_blocks(start, size, &mem, test_has_altmap_cb); 2165 + if (rc) { 2180 2166 if (size != memory_block_size_bytes()) { 2181 2167 pr_warn("Refuse to remove %#llx - %#llx," 2182 2168 "wrong granularity\n", 2183 2169 start, start + size); 2184 2170 return -EINVAL; 2185 2171 } 2186 - 2172 + altmap = mem->altmap; 2187 2173 /* 2188 - * Let remove_pmd_table->free_hugepage_table do the 2189 - * right thing if we used vmem_altmap when hot-adding 2190 - * the range. 2174 + * Mark altmap NULL so that we can add a debug 2175 + * check on memblock free. 2191 2176 */ 2192 - mhp_altmap.base_pfn = PHYS_PFN(start); 2193 - mhp_altmap.free = nr_vmemmap_pages; 2194 - mhp_altmap.alloc = nr_vmemmap_pages; 2195 - altmap = &mhp_altmap; 2177 + mem->altmap = NULL; 2196 2178 } 2197 2179 } 2198 2180 ··· 2203 2195 mem_hotplug_begin(); 2204 2196 2205 2197 arch_remove_memory(start, size, altmap); 2198 + 2199 + /* Verify that all vmemmap pages have actually been freed. */ 2200 + if (altmap) { 2201 + WARN(altmap->alloc, "Altmap not fully unmapped"); 2202 + kfree(altmap); 2203 + } 2206 2204 2207 2205 if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK)) { 2208 2206 memblock_phys_free(start, size);