Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

sh: bootmem refactoring.

This reworks much of the bootmem setup and initialization code allowing
us to get rid of duplicate work between the NUMA and non-NUMA cases. The
end result is that we end up with a much more flexible interface for
supporting more complex topologies (fake NUMA, highmem, etc, etc.) which
is entirely LMB backed. This is an incremental step for more NUMA work as
well as gradually enabling migration off of bootmem entirely.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>

+169 -142
+2
arch/sh/include/asm/mmzone.h
··· 44 44 /* arch/sh/kernel/setup.c */ 45 45 void __init __add_active_range(unsigned int nid, unsigned long start_pfn, 46 46 unsigned long end_pfn); 47 + /* arch/sh/mm/init.c */ 48 + void __init allocate_pgdat(unsigned int nid); 47 49 48 50 #endif /* __KERNEL__ */ 49 51 #endif /* __ASM_SH_MMZONE_H */
+1
arch/sh/include/asm/setup.h
··· 19 19 #define COMMAND_LINE ((char *) (PARAM+0x100)) 20 20 21 21 void sh_mv_setup(void); 22 + void check_for_initrd(void); 22 23 23 24 #endif /* __KERNEL__ */ 24 25
+4 -139
arch/sh/kernel/setup.c
··· 114 114 } 115 115 early_param("mem", early_parse_mem); 116 116 117 - /* 118 - * Register fully available low RAM pages with the bootmem allocator. 119 - */ 120 - static void __init register_bootmem_low_pages(void) 121 - { 122 - unsigned long curr_pfn, last_pfn, pages; 123 - 124 - /* 125 - * We are rounding up the start address of usable memory: 126 - */ 127 - curr_pfn = PFN_UP(__MEMORY_START); 128 - 129 - /* 130 - * ... and at the end of the usable range downwards: 131 - */ 132 - last_pfn = PFN_DOWN(__pa(memory_end)); 133 - 134 - if (last_pfn > max_low_pfn) 135 - last_pfn = max_low_pfn; 136 - 137 - pages = last_pfn - curr_pfn; 138 - free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(pages)); 139 - } 140 - 141 - static void __init check_for_initrd(void) 117 + void __init check_for_initrd(void) 142 118 { 143 119 #ifdef CONFIG_BLK_DEV_INITRD 144 120 unsigned long start, end; ··· 216 240 add_active_range(nid, start_pfn, end_pfn); 217 241 } 218 242 219 - void __init do_init_bootmem(void) 220 - { 221 - unsigned long bootmap_size; 222 - unsigned long bootmap_pages, bootmem_paddr; 223 - u64 total_pages = lmb_phys_mem_size() >> PAGE_SHIFT; 224 - int i; 225 - 226 - bootmap_pages = bootmem_bootmap_pages(total_pages); 227 - 228 - bootmem_paddr = lmb_alloc(bootmap_pages << PAGE_SHIFT, PAGE_SIZE); 229 - 230 - /* 231 - * Find a proper area for the bootmem bitmap. After this 232 - * bootstrap step all allocations (until the page allocator 233 - * is intact) must be done via bootmem_alloc(). 234 - */ 235 - bootmap_size = init_bootmem_node(NODE_DATA(0), 236 - bootmem_paddr >> PAGE_SHIFT, 237 - min_low_pfn, max_low_pfn); 238 - 239 - /* Add active regions with valid PFNs. */ 240 - for (i = 0; i < lmb.memory.cnt; i++) { 241 - unsigned long start_pfn, end_pfn; 242 - start_pfn = lmb.memory.region[i].base >> PAGE_SHIFT; 243 - end_pfn = start_pfn + lmb_size_pages(&lmb.memory, i); 244 - __add_active_range(0, start_pfn, end_pfn); 245 - } 246 - 247 - /* 248 - * Add all physical memory to the bootmem map and mark each 249 - * area as present. 250 - */ 251 - register_bootmem_low_pages(); 252 - 253 - /* Reserve the sections we're already using. */ 254 - for (i = 0; i < lmb.reserved.cnt; i++) 255 - reserve_bootmem(lmb.reserved.region[i].base, 256 - lmb_size_bytes(&lmb.reserved, i), 257 - BOOTMEM_DEFAULT); 258 - 259 - node_set_online(0); 260 - 261 - sparse_memory_present_with_active_regions(0); 262 - } 263 - 264 - static void __init early_reserve_mem(void) 265 - { 266 - unsigned long start_pfn; 267 - 268 - /* 269 - * Partially used pages are not usable - thus 270 - * we are rounding upwards: 271 - */ 272 - start_pfn = PFN_UP(__pa(_end)); 273 - 274 - /* 275 - * Reserve the kernel text and 276 - * Reserve the bootmem bitmap. We do this in two steps (first step 277 - * was init_bootmem()), because this catches the (definitely buggy) 278 - * case of us accidentally initializing the bootmem allocator with 279 - * an invalid RAM area. 280 - */ 281 - lmb_reserve(__MEMORY_START + CONFIG_ZERO_PAGE_OFFSET, 282 - (PFN_PHYS(start_pfn) + PAGE_SIZE - 1) - 283 - (__MEMORY_START + CONFIG_ZERO_PAGE_OFFSET)); 284 - 285 - /* 286 - * Reserve physical pages below CONFIG_ZERO_PAGE_OFFSET. 287 - */ 288 - if (CONFIG_ZERO_PAGE_OFFSET != 0) 289 - lmb_reserve(__MEMORY_START, CONFIG_ZERO_PAGE_OFFSET); 290 - 291 - /* 292 - * Handle additional early reservations 293 - */ 294 - check_for_initrd(); 295 - reserve_crashkernel(); 296 - } 297 - 298 243 /* 299 244 * Note: elfcorehdr_addr is not just limited to vmcore. It is also used by 300 245 * is_kdump_kernel() to determine if we are booting after a panic. Hence ··· 236 339 #endif 237 340 238 341 void __init __weak plat_early_device_setup(void) 239 - { 240 - } 241 - 242 - void __init __weak plat_mem_setup(void) 243 342 { 244 343 } 245 344 ··· 294 401 295 402 plat_early_device_setup(); 296 403 404 + sh_mv_setup(); 405 + 297 406 /* Let earlyprintk output early console messages */ 298 407 early_platform_driver_probe("earlyprintk", 1, 1); 299 408 300 - lmb_init(); 301 - 302 - sh_mv_setup(); 303 - sh_mv.mv_mem_init(); 304 - 305 - early_reserve_mem(); 306 - 307 - lmb_enforce_memory_limit(memory_limit); 308 - lmb_analyze(); 309 - 310 - lmb_dump_all(); 311 - 312 - /* 313 - * Determine low and high memory ranges: 314 - */ 315 - max_low_pfn = max_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT; 316 - min_low_pfn = __MEMORY_START >> PAGE_SHIFT; 317 - 318 - nodes_clear(node_online_map); 319 - 320 - memory_start = (unsigned long)__va(__MEMORY_START); 321 - memory_end = memory_start + (memory_limit ?: lmb_phys_mem_size()); 322 - 323 - uncached_init(); 324 - pmb_init(); 325 - do_init_bootmem(); 326 - plat_mem_setup(); 327 - sparse_init(); 409 + paging_init(); 328 410 329 411 #ifdef CONFIG_DUMMY_CONSOLE 330 412 conswitchp = &dummy_con; 331 413 #endif 332 - paging_init(); 333 - 334 - ioremap_fixed_init(); 335 414 336 415 /* Perform the machine specific initialisation */ 337 416 if (likely(sh_mv.mv_setup))
+162 -3
arch/sh/mm/init.c
··· 17 17 #include <linux/percpu.h> 18 18 #include <linux/io.h> 19 19 #include <linux/lmb.h> 20 + #include <linux/kexec.h> 20 21 #include <linux/dma-mapping.h> 21 22 #include <asm/mmu_context.h> 23 + #include <asm/mmzone.h> 22 24 #include <asm/tlb.h> 23 25 #include <asm/cacheflush.h> 24 26 #include <asm/sections.h> 27 + #include <asm/setup.h> 25 28 #include <asm/cache.h> 26 29 #include <asm/sizes.h> 27 30 ··· 34 31 void __init generic_mem_init(void) 35 32 { 36 33 lmb_add(__MEMORY_START, __MEMORY_SIZE); 34 + } 35 + 36 + void __init __weak plat_mem_setup(void) 37 + { 38 + /* Nothing to see here, move along. */ 37 39 } 38 40 39 41 #ifdef CONFIG_MMU ··· 166 158 } 167 159 #endif /* CONFIG_MMU */ 168 160 169 - /* 170 - * paging_init() sets up the page tables 171 - */ 161 + void __init allocate_pgdat(unsigned int nid) 162 + { 163 + unsigned long start_pfn, end_pfn; 164 + #ifdef CONFIG_NEED_MULTIPLE_NODES 165 + unsigned long phys; 166 + #endif 167 + 168 + get_pfn_range_for_nid(nid, &start_pfn, &end_pfn); 169 + 170 + #ifdef CONFIG_NEED_MULTIPLE_NODES 171 + phys = __lmb_alloc_base(sizeof(struct pglist_data), 172 + SMP_CACHE_BYTES, end_pfn << PAGE_SHIFT); 173 + /* Retry with all of system memory */ 174 + if (!phys) 175 + phys = __lmb_alloc_base(sizeof(struct pglist_data), 176 + SMP_CACHE_BYTES, lmb_end_of_DRAM()); 177 + if (!phys) 178 + panic("Can't allocate pgdat for node %d\n", nid); 179 + 180 + NODE_DATA(nid) = __va(phys); 181 + memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); 182 + 183 + NODE_DATA(nid)->bdata = &bootmem_node_data[nid]; 184 + #endif 185 + 186 + NODE_DATA(nid)->node_start_pfn = start_pfn; 187 + NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn; 188 + } 189 + 190 + static void __init bootmem_init_one_node(unsigned int nid) 191 + { 192 + unsigned long total_pages, paddr; 193 + unsigned long end_pfn; 194 + struct pglist_data *p; 195 + int i; 196 + 197 + p = NODE_DATA(nid); 198 + 199 + /* Nothing to do.. */ 200 + if (!p->node_spanned_pages) 201 + return; 202 + 203 + end_pfn = p->node_start_pfn + p->node_spanned_pages; 204 + 205 + total_pages = bootmem_bootmap_pages(p->node_spanned_pages); 206 + 207 + paddr = lmb_alloc(total_pages << PAGE_SHIFT, PAGE_SIZE); 208 + if (!paddr) 209 + panic("Can't allocate bootmap for nid[%d]\n", nid); 210 + 211 + init_bootmem_node(p, paddr >> PAGE_SHIFT, p->node_start_pfn, end_pfn); 212 + 213 + free_bootmem_with_active_regions(nid, end_pfn); 214 + 215 + /* 216 + * XXX Handle initial reservations for the system memory node 217 + * only for the moment, we'll refactor this later for handling 218 + * reservations in other nodes. 219 + */ 220 + if (nid == 0) { 221 + /* Reserve the sections we're already using. */ 222 + for (i = 0; i < lmb.reserved.cnt; i++) 223 + reserve_bootmem(lmb.reserved.region[i].base, 224 + lmb_size_bytes(&lmb.reserved, i), 225 + BOOTMEM_DEFAULT); 226 + } 227 + 228 + sparse_memory_present_with_active_regions(nid); 229 + } 230 + 231 + static void __init do_init_bootmem(void) 232 + { 233 + int i; 234 + 235 + /* Add active regions with valid PFNs. */ 236 + for (i = 0; i < lmb.memory.cnt; i++) { 237 + unsigned long start_pfn, end_pfn; 238 + start_pfn = lmb.memory.region[i].base >> PAGE_SHIFT; 239 + end_pfn = start_pfn + lmb_size_pages(&lmb.memory, i); 240 + __add_active_range(0, start_pfn, end_pfn); 241 + } 242 + 243 + /* All of system RAM sits in node 0 for the non-NUMA case */ 244 + allocate_pgdat(0); 245 + node_set_online(0); 246 + 247 + plat_mem_setup(); 248 + 249 + for_each_online_node(i) 250 + bootmem_init_one_node(i); 251 + 252 + sparse_init(); 253 + } 254 + 255 + static void __init early_reserve_mem(void) 256 + { 257 + unsigned long start_pfn; 258 + 259 + /* 260 + * Partially used pages are not usable - thus 261 + * we are rounding upwards: 262 + */ 263 + start_pfn = PFN_UP(__pa(_end)); 264 + 265 + /* 266 + * Reserve the kernel text and Reserve the bootmem bitmap. We do 267 + * this in two steps (first step was init_bootmem()), because 268 + * this catches the (definitely buggy) case of us accidentally 269 + * initializing the bootmem allocator with an invalid RAM area. 270 + */ 271 + lmb_reserve(__MEMORY_START + CONFIG_ZERO_PAGE_OFFSET, 272 + (PFN_PHYS(start_pfn) + PAGE_SIZE - 1) - 273 + (__MEMORY_START + CONFIG_ZERO_PAGE_OFFSET)); 274 + 275 + /* 276 + * Reserve physical pages below CONFIG_ZERO_PAGE_OFFSET. 277 + */ 278 + if (CONFIG_ZERO_PAGE_OFFSET != 0) 279 + lmb_reserve(__MEMORY_START, CONFIG_ZERO_PAGE_OFFSET); 280 + 281 + /* 282 + * Handle additional early reservations 283 + */ 284 + check_for_initrd(); 285 + reserve_crashkernel(); 286 + } 287 + 172 288 void __init paging_init(void) 173 289 { 174 290 unsigned long max_zone_pfns[MAX_NR_ZONES]; 175 291 unsigned long vaddr, end; 176 292 int nid; 293 + 294 + lmb_init(); 295 + 296 + sh_mv.mv_mem_init(); 297 + 298 + early_reserve_mem(); 299 + 300 + lmb_enforce_memory_limit(memory_limit); 301 + lmb_analyze(); 302 + 303 + lmb_dump_all(); 304 + 305 + /* 306 + * Determine low and high memory ranges: 307 + */ 308 + max_low_pfn = max_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT; 309 + min_low_pfn = __MEMORY_START >> PAGE_SHIFT; 310 + 311 + nodes_clear(node_online_map); 312 + 313 + memory_start = (unsigned long)__va(__MEMORY_START); 314 + memory_end = memory_start + (memory_limit ?: lmb_phys_mem_size()); 315 + 316 + uncached_init(); 317 + pmb_init(); 318 + do_init_bootmem(); 319 + ioremap_fixed_init(); 177 320 178 321 /* We don't need to map the kernel through the TLB, as 179 322 * it is permanatly mapped using P1. So clear the