Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

bootmem: factor out the marking of a PFN range

Introduce new helpers that mark a range that resides completely on a node
or node-agnostic ranges that might also span node boundaries.

The free/reserve API functions will then directly use these helpers.

Note that the free/reserve semantics become more strict: while the prior
code took basically arbitrary range arguments and marked the PFNs that
happen to fall into that range, the new code requires node-specific ranges
to be completely on the node. The node-agnostic requests might span node
boundaries as long as the nodes are contiguous.

Passing ranges that do not satisfy these criteria is a bug.

[akpm@linux-foundation.org: fix printk warnings]
Signed-off-by: Johannes Weiner <hannes@saeurebad.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: Andi Kleen <andi@firstfloor.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Johannes Weiner and committed by
Linus Torvalds
e2bf3cae d747fa4b

+69 -119
+69 -119
mm/bootmem.c
··· 234 234 sidx + PFN_DOWN(bdata->node_boot_start), 235 235 eidx + PFN_DOWN(bdata->node_boot_start)); 236 236 237 + if (bdata->hint_idx > sidx) 238 + bdata->hint_idx = sidx; 239 + 237 240 for (idx = sidx; idx < eidx; idx++) 238 241 if (!test_and_clear_bit(idx, bdata->node_bootmem_map)) 239 242 BUG(); ··· 266 263 return 0; 267 264 } 268 265 269 - static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr, 270 - unsigned long size) 266 + static int __init mark_bootmem_node(bootmem_data_t *bdata, 267 + unsigned long start, unsigned long end, 268 + int reserve, int flags) 271 269 { 272 270 unsigned long sidx, eidx; 273 - unsigned long i; 274 271 275 - BUG_ON(!size); 272 + bdebug("nid=%td start=%lx end=%lx reserve=%d flags=%x\n", 273 + bdata - bootmem_node_data, start, end, reserve, flags); 276 274 277 - /* out range */ 278 - if (addr + size < bdata->node_boot_start || 279 - PFN_DOWN(addr) > bdata->node_low_pfn) 280 - return; 281 - /* 282 - * round down end of usable mem, partially free pages are 283 - * considered reserved. 284 - */ 275 + BUG_ON(start < PFN_DOWN(bdata->node_boot_start)); 276 + BUG_ON(end > bdata->node_low_pfn); 285 277 286 - if (addr >= bdata->node_boot_start && 287 - PFN_DOWN(addr - bdata->node_boot_start) < bdata->hint_idx) 288 - bdata->hint_idx = PFN_DOWN(addr - bdata->node_boot_start); 278 + sidx = start - PFN_DOWN(bdata->node_boot_start); 279 + eidx = end - PFN_DOWN(bdata->node_boot_start); 289 280 290 - /* 291 - * Round up to index to the range. 292 - */ 293 - if (PFN_UP(addr) > PFN_DOWN(bdata->node_boot_start)) 294 - sidx = PFN_UP(addr) - PFN_DOWN(bdata->node_boot_start); 281 + if (reserve) 282 + return __reserve(bdata, sidx, eidx, flags); 295 283 else 296 - sidx = 0; 284 + __free(bdata, sidx, eidx); 285 + return 0; 286 + } 297 287 298 - eidx = PFN_DOWN(addr + size - bdata->node_boot_start); 299 - if (eidx > bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start)) 300 - eidx = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start); 288 + static int __init mark_bootmem(unsigned long start, unsigned long end, 289 + int reserve, int flags) 290 + { 291 + unsigned long pos; 292 + bootmem_data_t *bdata; 301 293 302 - __free(bdata, sidx, eidx); 294 + pos = start; 295 + list_for_each_entry(bdata, &bdata_list, list) { 296 + int err; 297 + unsigned long max; 298 + 299 + if (pos < PFN_DOWN(bdata->node_boot_start)) { 300 + BUG_ON(pos != start); 301 + continue; 302 + } 303 + 304 + max = min(bdata->node_low_pfn, end); 305 + 306 + err = mark_bootmem_node(bdata, pos, max, reserve, flags); 307 + if (reserve && err) { 308 + mark_bootmem(start, pos, 0, 0); 309 + return err; 310 + } 311 + 312 + if (max == end) 313 + return 0; 314 + pos = bdata->node_low_pfn; 315 + } 316 + BUG(); 303 317 } 304 318 305 319 /** ··· 327 307 * 328 308 * Partial pages will be considered reserved and left as they are. 329 309 * 330 - * Only physical pages that actually reside on @pgdat are marked. 310 + * The range must reside completely on the specified node. 331 311 */ 332 312 void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, 333 313 unsigned long size) 334 314 { 335 - free_bootmem_core(pgdat->bdata, physaddr, size); 315 + unsigned long start, end; 316 + 317 + start = PFN_UP(physaddr); 318 + end = PFN_DOWN(physaddr + size); 319 + 320 + mark_bootmem_node(pgdat->bdata, start, end, 0, 0); 336 321 } 337 322 338 323 /** ··· 347 322 * 348 323 * Partial pages will be considered reserved and left as they are. 349 324 * 350 - * All physical pages within the range are marked, no matter what 351 - * node they reside on. 325 + * The range must be contiguous but may span node boundaries. 352 326 */ 353 327 void __init free_bootmem(unsigned long addr, unsigned long size) 354 328 { 355 - bootmem_data_t *bdata; 356 - list_for_each_entry(bdata, &bdata_list, list) 357 - free_bootmem_core(bdata, addr, size); 358 - } 329 + unsigned long start, end; 359 330 360 - /* 361 - * Marks a particular physical memory range as unallocatable. Usable RAM 362 - * might be used for boot-time allocations - or it might get added 363 - * to the free page pool later on. 364 - */ 365 - static int __init can_reserve_bootmem_core(bootmem_data_t *bdata, 366 - unsigned long addr, unsigned long size, int flags) 367 - { 368 - unsigned long sidx, eidx; 369 - unsigned long i; 331 + start = PFN_UP(addr); 332 + end = PFN_DOWN(addr + size); 370 333 371 - BUG_ON(!size); 372 - 373 - /* out of range, don't hold other */ 374 - if (addr + size < bdata->node_boot_start || 375 - PFN_DOWN(addr) > bdata->node_low_pfn) 376 - return 0; 377 - 378 - /* 379 - * Round up to index to the range. 380 - */ 381 - if (addr > bdata->node_boot_start) 382 - sidx= PFN_DOWN(addr - bdata->node_boot_start); 383 - else 384 - sidx = 0; 385 - 386 - eidx = PFN_UP(addr + size - bdata->node_boot_start); 387 - if (eidx > bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start)) 388 - eidx = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start); 389 - 390 - for (i = sidx; i < eidx; i++) { 391 - if (test_bit(i, bdata->node_bootmem_map)) { 392 - if (flags & BOOTMEM_EXCLUSIVE) 393 - return -EBUSY; 394 - } 395 - } 396 - 397 - return 0; 398 - 399 - } 400 - 401 - static void __init reserve_bootmem_core(bootmem_data_t *bdata, 402 - unsigned long addr, unsigned long size, int flags) 403 - { 404 - unsigned long sidx, eidx; 405 - unsigned long i; 406 - 407 - BUG_ON(!size); 408 - 409 - /* out of range */ 410 - if (addr + size < bdata->node_boot_start || 411 - PFN_DOWN(addr) > bdata->node_low_pfn) 412 - return; 413 - 414 - /* 415 - * Round up to index to the range. 416 - */ 417 - if (addr > bdata->node_boot_start) 418 - sidx= PFN_DOWN(addr - bdata->node_boot_start); 419 - else 420 - sidx = 0; 421 - 422 - eidx = PFN_UP(addr + size - bdata->node_boot_start); 423 - if (eidx > bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start)) 424 - eidx = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start); 425 - 426 - return __reserve(bdata, sidx, eidx, flags); 334 + mark_bootmem(start, end, 0, 0); 427 335 } 428 336 429 337 /** ··· 368 410 * 369 411 * Partial pages will be reserved. 370 412 * 371 - * Only physical pages that actually reside on @pgdat are marked. 413 + * The range must reside completely on the specified node. 372 414 */ 373 415 int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, 374 416 unsigned long size, int flags) 375 417 { 376 - int ret; 418 + unsigned long start, end; 377 419 378 - ret = can_reserve_bootmem_core(pgdat->bdata, physaddr, size, flags); 379 - if (ret < 0) 380 - return -ENOMEM; 381 - reserve_bootmem_core(pgdat->bdata, physaddr, size, flags); 382 - return 0; 420 + start = PFN_DOWN(physaddr); 421 + end = PFN_UP(physaddr + size); 422 + 423 + return mark_bootmem_node(pgdat->bdata, start, end, 1, flags); 383 424 } 384 425 385 426 #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE ··· 390 433 * 391 434 * Partial pages will be reserved. 392 435 * 393 - * All physical pages within the range are marked, no matter what 394 - * node they reside on. 436 + * The range must be contiguous but may span node boundaries. 395 437 */ 396 438 int __init reserve_bootmem(unsigned long addr, unsigned long size, 397 439 int flags) 398 440 { 399 - bootmem_data_t *bdata; 400 - int ret; 441 + unsigned long start, end; 401 442 402 - list_for_each_entry(bdata, &bdata_list, list) { 403 - ret = can_reserve_bootmem_core(bdata, addr, size, flags); 404 - if (ret < 0) 405 - return ret; 406 - } 407 - list_for_each_entry(bdata, &bdata_list, list) 408 - reserve_bootmem_core(bdata, addr, size, flags); 443 + start = PFN_DOWN(addr); 444 + end = PFN_UP(addr + size); 409 445 410 - return 0; 446 + return mark_bootmem(start, end, 1, flags); 411 447 } 412 448 #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ 413 449 ··· 613 663 if (start_nr != section_nr || end_nr != section_nr) { 614 664 printk(KERN_WARNING "alloc_bootmem failed on section %ld.\n", 615 665 section_nr); 616 - free_bootmem_core(pgdat->bdata, __pa(ptr), size); 666 + free_bootmem_node(pgdat, __pa(ptr), size); 617 667 ptr = NULL; 618 668 } 619 669