Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

arch, mm: introduce arch_mm_preinit

Currently, implementation of mem_init() in every architecture consists of
one or more of the following:

* initializations that must run before page allocator is active, for
instance swiotlb_init()
* a call to memblock_free_all() to release all the memory to the buddy
allocator
* initializations that must run after page allocator is ready and there is
no arch-specific hook other than mem_init() for that, like for example
register_page_bootmem_info() in x86 and sparc64 or simple setting of
mem_init_done = 1 in several architectures
* a bunch of semi-related stuff that apparently had no better place to
live, for example a ton of BUILD_BUG_ON()s in parisc.

Introduce arch_mm_preinit() that will be the first thing called from
mm_core_init(). On architectures that have initializations that must happen
before the page allocator is ready, move those into arch_mm_preinit() along
with the code that does not depend on ordering with page allocator setup.

On several architectures this results in reduction of mem_init() to a
single call to memblock_free_all() that allows its consolidation next.

Link: https://lkml.kernel.org/r/20250313135003.836600-13-rppt@kernel.org
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Acked-by: Dave Hansen <dave.hansen@linux.intel.com> [x86]
Tested-by: Mark Brown <broonie@kernel.org>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Andreas Larsson <andreas@gaisler.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Betkov <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Dinh Nguyen <dinguyen@kernel.org>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Guo Ren (csky) <guoren@kernel.org>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Helge Deller <deller@gmx.de>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Russel King <linux@armlinux.org.uk>
Cc: Stafford Horne <shorne@gmail.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Thomas Gleinxer <tglx@linutronix.de>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vineet Gupta <vgupta@kernel.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Mike Rapoport (Microsoft) and committed by
Andrew Morton
0d98484e 6faea342

+77 -40
+6 -7
arch/arc/mm/init.c
··· 157 157 free_area_init(max_zone_pfn); 158 158 } 159 159 160 - static void __init highmem_init(void) 160 + void __init arch_mm_preinit(void) 161 161 { 162 162 #ifdef CONFIG_HIGHMEM 163 163 memblock_phys_free(high_mem_start, high_mem_sz); 164 164 #endif 165 + 166 + BUILD_BUG_ON((PTRS_PER_PGD * sizeof(pgd_t)) > PAGE_SIZE); 167 + BUILD_BUG_ON((PTRS_PER_PUD * sizeof(pud_t)) > PAGE_SIZE); 168 + BUILD_BUG_ON((PTRS_PER_PMD * sizeof(pmd_t)) > PAGE_SIZE); 169 + BUILD_BUG_ON((PTRS_PER_PTE * sizeof(pte_t)) > PAGE_SIZE); 165 170 } 166 171 167 172 /* ··· 177 172 */ 178 173 void __init mem_init(void) 179 174 { 180 - highmem_init(); 181 175 memblock_free_all(); 182 - 183 - BUILD_BUG_ON((PTRS_PER_PGD * sizeof(pgd_t)) > PAGE_SIZE); 184 - BUILD_BUG_ON((PTRS_PER_PUD * sizeof(pud_t)) > PAGE_SIZE); 185 - BUILD_BUG_ON((PTRS_PER_PMD * sizeof(pmd_t)) > PAGE_SIZE); 186 - BUILD_BUG_ON((PTRS_PER_PTE * sizeof(pte_t)) > PAGE_SIZE); 187 176 } 188 177 189 178 #ifdef CONFIG_HIGHMEM
+12 -9
arch/arm/mm/init.c
··· 237 237 *p++ = 0xe7fddef0; 238 238 } 239 239 240 - /* 241 - * mem_init() marks the free areas in the mem_map and tells us how much 242 - * memory is free. This is done after various parts of the system have 243 - * claimed their memory after the kernel image. 244 - */ 245 - void __init mem_init(void) 240 + void __init arch_mm_preinit(void) 246 241 { 247 242 #ifdef CONFIG_ARM_LPAE 248 243 swiotlb_init(max_pfn > arm_dma_pfn_limit, SWIOTLB_VERBOSE); ··· 247 252 /* now that our DMA memory is actually so designated, we can free it */ 248 253 memblock_phys_free(PHYS_OFFSET, __pa(swapper_pg_dir) - PHYS_OFFSET); 249 254 #endif 250 - 251 - /* this will put all unused low memory onto the freelists */ 252 - memblock_free_all(); 253 255 254 256 /* 255 257 * Check boundaries twice: Some fundamental inconsistencies can ··· 261 269 BUILD_BUG_ON(PKMAP_BASE + LAST_PKMAP * PAGE_SIZE > PAGE_OFFSET); 262 270 BUG_ON(PKMAP_BASE + LAST_PKMAP * PAGE_SIZE > PAGE_OFFSET); 263 271 #endif 272 + } 273 + 274 + /* 275 + * mem_init() marks the free areas in the mem_map and tells us how much 276 + * memory is free. This is done after various parts of the system have 277 + * claimed their memory after the kernel image. 278 + */ 279 + void __init mem_init(void) 280 + { 281 + /* this will put all unused low memory onto the freelists */ 282 + memblock_free_all(); 264 283 } 265 284 266 285 #ifdef CONFIG_STRICT_KERNEL_RWX
+12 -9
arch/arm64/mm/init.c
··· 357 357 memblock_dump_all(); 358 358 } 359 359 360 - /* 361 - * mem_init() marks the free areas in the mem_map and tells us how much memory 362 - * is free. This is done after various parts of the system have claimed their 363 - * memory after the kernel image. 364 - */ 365 - void __init mem_init(void) 360 + void __init arch_mm_preinit(void) 366 361 { 367 362 unsigned int flags = SWIOTLB_VERBOSE; 368 363 bool swiotlb = max_pfn > PFN_DOWN(arm64_dma_phys_limit); ··· 380 385 381 386 swiotlb_init(swiotlb, flags); 382 387 swiotlb_update_mem_attributes(); 383 - 384 - /* this will put all unused low memory onto the freelists */ 385 - memblock_free_all(); 386 388 387 389 /* 388 390 * Check boundaries twice: Some fundamental inconsistencies can be ··· 404 412 */ 405 413 sysctl_overcommit_memory = OVERCOMMIT_ALWAYS; 406 414 } 415 + } 416 + 417 + /* 418 + * mem_init() marks the free areas in the mem_map and tells us how much memory 419 + * is free. This is done after various parts of the system have claimed their 420 + * memory after the kernel image. 421 + */ 422 + void __init mem_init(void) 423 + { 424 + /* this will put all unused low memory onto the freelists */ 425 + memblock_free_all(); 407 426 } 408 427 409 428 void free_initmem(void)
+7 -4
arch/mips/mm/init.c
··· 425 425 static struct kcore_list kcore_kseg0; 426 426 #endif 427 427 428 - void __init mem_init(void) 428 + void __init arch_mm_preinit(void) 429 429 { 430 430 /* 431 431 * When PFN_PTE_SHIFT is greater than PAGE_SHIFT we won't have enough PTE ··· 435 435 436 436 maar_init(); 437 437 setup_zero_pages(); /* Setup zeroed pages. */ 438 - memblock_free_all(); 439 438 440 439 #ifdef CONFIG_64BIT 441 440 if ((unsigned long) &_text > (unsigned long) CKSEG0) ··· 445 446 #endif 446 447 } 447 448 #else /* CONFIG_NUMA */ 448 - void __init mem_init(void) 449 + void __init arch_mm_preinit(void) 449 450 { 450 451 setup_zero_pages(); /* This comes from node 0 */ 451 - memblock_free_all(); 452 452 } 453 453 #endif /* !CONFIG_NUMA */ 454 + 455 + void __init mem_init(void) 456 + { 457 + memblock_free_all(); 458 + } 454 459 455 460 void free_init_pages(const char *what, unsigned long begin, unsigned long end) 456 461 {
+6 -3
arch/powerpc/mm/mem.c
··· 273 273 mark_nonram_nosave(); 274 274 } 275 275 276 - void __init mem_init(void) 276 + void __init arch_mm_preinit(void) 277 277 { 278 278 /* 279 279 * book3s is limited to 16 page sizes due to encoding this in ··· 294 294 #endif 295 295 296 296 kasan_late_init(); 297 - 298 - memblock_free_all(); 299 297 300 298 #if defined(CONFIG_PPC_E500) && !defined(CONFIG_SMP) 301 299 /* ··· 325 327 MODULES_VADDR, MODULES_END); 326 328 #endif 327 329 #endif /* CONFIG_PPC32 */ 330 + } 331 + 332 + void __init mem_init(void) 333 + { 334 + memblock_free_all(); 328 335 } 329 336 330 337 void free_initmem(void)
+6 -2
arch/riscv/mm/init.c
··· 171 171 static void print_vm_layout(void) { } 172 172 #endif /* CONFIG_DEBUG_VM */ 173 173 174 - void __init mem_init(void) 174 + void __init arch_mm_preinit(void) 175 175 { 176 176 bool swiotlb = max_pfn > PFN_DOWN(dma32_phys_limit); 177 177 #ifdef CONFIG_FLATMEM ··· 192 192 } 193 193 194 194 swiotlb_init(swiotlb, SWIOTLB_VERBOSE); 195 - memblock_free_all(); 196 195 197 196 print_vm_layout(); 197 + } 198 + 199 + void __init mem_init(void) 200 + { 201 + memblock_free_all(); 198 202 } 199 203 200 204 /* Limit the memory size via mem. */
+4 -1
arch/s390/mm/init.c
··· 154 154 swiotlb_update_mem_attributes(); 155 155 } 156 156 157 - void __init mem_init(void) 157 + void __init arch_mm_preinit(void) 158 158 { 159 159 cpumask_set_cpu(0, &init_mm.context.cpu_attach_mask); 160 160 cpumask_set_cpu(0, mm_cpumask(&init_mm)); ··· 163 163 kfence_split_mapping(); 164 164 165 165 setup_zero_pages(); /* Setup zeroed pages. */ 166 + } 166 167 168 + void __init mem_init(void) 169 + { 167 170 /* this will put all low memory onto the freelists */ 168 171 memblock_free_all(); 169 172 }
+4 -1
arch/sparc/mm/init_32.c
··· 232 232 } 233 233 } 234 234 235 - void __init mem_init(void) 235 + void __init arch_mm_preinit(void) 236 236 { 237 237 int i; 238 238 ··· 262 262 memset(sparc_valid_addr_bitmap, 0, i << 2); 263 263 264 264 taint_real_pages(); 265 + } 265 266 267 + void __init mem_init(void) 268 + { 266 269 memblock_free_all(); 267 270 } 268 271
+5 -2
arch/um/kernel/mem.c
··· 54 54 /* Used during early boot */ 55 55 static unsigned long brk_end; 56 56 57 - void __init mem_init(void) 57 + void __init arch_mm_preinit(void) 58 58 { 59 59 /* clear the zero-page */ 60 60 memset(empty_zero_page, 0, PAGE_SIZE); ··· 66 66 map_memory(brk_end, __pa(brk_end), uml_reserved - brk_end, 1, 1, 0); 67 67 memblock_free((void *)brk_end, uml_reserved - brk_end); 68 68 uml_reserved = brk_end; 69 + max_pfn = max_low_pfn; 70 + } 69 71 72 + void __init mem_init(void) 73 + { 70 74 /* this will put all low memory onto the freelists */ 71 75 memblock_free_all(); 72 - max_pfn = max_low_pfn; 73 76 kmalloc_ok = 1; 74 77 } 75 78
+5 -1
arch/x86/mm/init_32.c
··· 691 691 panic("Linux doesn't support CPUs with broken WP."); 692 692 } 693 693 694 - void __init mem_init(void) 694 + void __init arch_mm_preinit(void) 695 695 { 696 696 pci_iommu_alloc(); 697 697 698 698 #ifdef CONFIG_FLATMEM 699 699 BUG_ON(!mem_map); 700 700 #endif 701 + } 702 + 703 + void __init mem_init(void) 704 + { 701 705 /* this will put all low memory onto the freelists */ 702 706 memblock_free_all(); 703 707
+4 -1
arch/x86/mm/init_64.c
··· 1348 1348 panic("Failed to pre-allocate %s pages for vmalloc area\n", lvl); 1349 1349 } 1350 1350 1351 - void __init mem_init(void) 1351 + void __init arch_mm_preinit(void) 1352 1352 { 1353 1353 pci_iommu_alloc(); 1354 + } 1354 1355 1356 + void __init mem_init(void) 1357 + { 1355 1358 /* clear_bss() already clear the empty_zero_page */ 1356 1359 1357 1360 /* this will put all memory onto the freelists */
+1
include/linux/mm.h
··· 43 43 44 44 extern int sysctl_page_lock_unfairness; 45 45 46 + void arch_mm_preinit(void); 46 47 void mm_core_init(void); 47 48 void init_mm_internals(void); 48 49
+5
mm/mm_init.c
··· 2734 2734 ); 2735 2735 } 2736 2736 2737 + void __init __weak arch_mm_preinit(void) 2738 + { 2739 + } 2740 + 2737 2741 /* 2738 2742 * Set up kernel memory allocators 2739 2743 */ 2740 2744 void __init mm_core_init(void) 2741 2745 { 2746 + arch_mm_preinit(); 2742 2747 hugetlb_bootmem_alloc(); 2743 2748 2744 2749 /* Initializations relying on SMP setup */