at v2.6.16-rc2 439 lines 12 kB view raw
1/* 2 * linux/mm/bootmem.c 3 * 4 * Copyright (C) 1999 Ingo Molnar 5 * Discontiguous memory support, Kanoj Sarcar, SGI, Nov 1999 6 * 7 * simple boot-time physical memory area allocator and 8 * free memory collector. It's used to deal with reserved 9 * system memory and memory holes as well. 10 */ 11 12#include <linux/mm.h> 13#include <linux/kernel_stat.h> 14#include <linux/swap.h> 15#include <linux/interrupt.h> 16#include <linux/init.h> 17#include <linux/bootmem.h> 18#include <linux/mmzone.h> 19#include <linux/module.h> 20#include <asm/dma.h> 21#include <asm/io.h> 22#include "internal.h" 23 24/* 25 * Access to this subsystem has to be serialized externally. (this is 26 * true for the boot process anyway) 27 */ 28unsigned long max_low_pfn; 29unsigned long min_low_pfn; 30unsigned long max_pfn; 31 32EXPORT_SYMBOL(max_pfn); /* This is exported so 33 * dma_get_required_mask(), which uses 34 * it, can be an inline function */ 35 36#ifdef CONFIG_CRASH_DUMP 37/* 38 * If we have booted due to a crash, max_pfn will be a very low value. We need 39 * to know the amount of memory that the previous kernel used. 40 */ 41unsigned long saved_max_pfn; 42#endif 43 44/* return the number of _pages_ that will be allocated for the boot bitmap */ 45unsigned long __init bootmem_bootmap_pages (unsigned long pages) 46{ 47 unsigned long mapsize; 48 49 mapsize = (pages+7)/8; 50 mapsize = (mapsize + ~PAGE_MASK) & PAGE_MASK; 51 mapsize >>= PAGE_SHIFT; 52 53 return mapsize; 54} 55 56/* 57 * Called once to set up the allocator itself. 58 */ 59static unsigned long __init init_bootmem_core (pg_data_t *pgdat, 60 unsigned long mapstart, unsigned long start, unsigned long end) 61{ 62 bootmem_data_t *bdata = pgdat->bdata; 63 unsigned long mapsize = ((end - start)+7)/8; 64 65 pgdat->pgdat_next = pgdat_list; 66 pgdat_list = pgdat; 67 68 mapsize = ALIGN(mapsize, sizeof(long)); 69 bdata->node_bootmem_map = phys_to_virt(mapstart << PAGE_SHIFT); 70 bdata->node_boot_start = (start << PAGE_SHIFT); 71 bdata->node_low_pfn = end; 72 73 /* 74 * Initially all pages are reserved - setup_arch() has to 75 * register free RAM areas explicitly. 76 */ 77 memset(bdata->node_bootmem_map, 0xff, mapsize); 78 79 return mapsize; 80} 81 82/* 83 * Marks a particular physical memory range as unallocatable. Usable RAM 84 * might be used for boot-time allocations - or it might get added 85 * to the free page pool later on. 86 */ 87static void __init reserve_bootmem_core(bootmem_data_t *bdata, unsigned long addr, unsigned long size) 88{ 89 unsigned long i; 90 /* 91 * round up, partially reserved pages are considered 92 * fully reserved. 93 */ 94 unsigned long sidx = (addr - bdata->node_boot_start)/PAGE_SIZE; 95 unsigned long eidx = (addr + size - bdata->node_boot_start + 96 PAGE_SIZE-1)/PAGE_SIZE; 97 unsigned long end = (addr + size + PAGE_SIZE-1)/PAGE_SIZE; 98 99 BUG_ON(!size); 100 BUG_ON(sidx >= eidx); 101 BUG_ON((addr >> PAGE_SHIFT) >= bdata->node_low_pfn); 102 BUG_ON(end > bdata->node_low_pfn); 103 104 for (i = sidx; i < eidx; i++) 105 if (test_and_set_bit(i, bdata->node_bootmem_map)) { 106#ifdef CONFIG_DEBUG_BOOTMEM 107 printk("hm, page %08lx reserved twice.\n", i*PAGE_SIZE); 108#endif 109 } 110} 111 112static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr, unsigned long size) 113{ 114 unsigned long i; 115 unsigned long start; 116 /* 117 * round down end of usable mem, partially free pages are 118 * considered reserved. 119 */ 120 unsigned long sidx; 121 unsigned long eidx = (addr + size - bdata->node_boot_start)/PAGE_SIZE; 122 unsigned long end = (addr + size)/PAGE_SIZE; 123 124 BUG_ON(!size); 125 BUG_ON(end > bdata->node_low_pfn); 126 127 if (addr < bdata->last_success) 128 bdata->last_success = addr; 129 130 /* 131 * Round up the beginning of the address. 132 */ 133 start = (addr + PAGE_SIZE-1) / PAGE_SIZE; 134 sidx = start - (bdata->node_boot_start/PAGE_SIZE); 135 136 for (i = sidx; i < eidx; i++) { 137 if (unlikely(!test_and_clear_bit(i, bdata->node_bootmem_map))) 138 BUG(); 139 } 140} 141 142/* 143 * We 'merge' subsequent allocations to save space. We might 'lose' 144 * some fraction of a page if allocations cannot be satisfied due to 145 * size constraints on boxes where there is physical RAM space 146 * fragmentation - in these cases (mostly large memory boxes) this 147 * is not a problem. 148 * 149 * On low memory boxes we get it right in 100% of the cases. 150 * 151 * alignment has to be a power of 2 value. 152 * 153 * NOTE: This function is _not_ reentrant. 154 */ 155static void * __init 156__alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size, 157 unsigned long align, unsigned long goal, unsigned long limit) 158{ 159 unsigned long offset, remaining_size, areasize, preferred; 160 unsigned long i, start = 0, incr, eidx, end_pfn = bdata->node_low_pfn; 161 void *ret; 162 163 if(!size) { 164 printk("__alloc_bootmem_core(): zero-sized request\n"); 165 BUG(); 166 } 167 BUG_ON(align & (align-1)); 168 169 if (limit && bdata->node_boot_start >= limit) 170 return NULL; 171 172 limit >>=PAGE_SHIFT; 173 if (limit && end_pfn > limit) 174 end_pfn = limit; 175 176 eidx = end_pfn - (bdata->node_boot_start >> PAGE_SHIFT); 177 offset = 0; 178 if (align && 179 (bdata->node_boot_start & (align - 1UL)) != 0) 180 offset = (align - (bdata->node_boot_start & (align - 1UL))); 181 offset >>= PAGE_SHIFT; 182 183 /* 184 * We try to allocate bootmem pages above 'goal' 185 * first, then we try to allocate lower pages. 186 */ 187 if (goal && (goal >= bdata->node_boot_start) && 188 ((goal >> PAGE_SHIFT) < end_pfn)) { 189 preferred = goal - bdata->node_boot_start; 190 191 if (bdata->last_success >= preferred) 192 if (!limit || (limit && limit > bdata->last_success)) 193 preferred = bdata->last_success; 194 } else 195 preferred = 0; 196 197 preferred = ALIGN(preferred, align) >> PAGE_SHIFT; 198 preferred += offset; 199 areasize = (size+PAGE_SIZE-1)/PAGE_SIZE; 200 incr = align >> PAGE_SHIFT ? : 1; 201 202restart_scan: 203 for (i = preferred; i < eidx; i += incr) { 204 unsigned long j; 205 i = find_next_zero_bit(bdata->node_bootmem_map, eidx, i); 206 i = ALIGN(i, incr); 207 if (i >= eidx) 208 break; 209 if (test_bit(i, bdata->node_bootmem_map)) 210 continue; 211 for (j = i + 1; j < i + areasize; ++j) { 212 if (j >= eidx) 213 goto fail_block; 214 if (test_bit (j, bdata->node_bootmem_map)) 215 goto fail_block; 216 } 217 start = i; 218 goto found; 219 fail_block: 220 i = ALIGN(j, incr); 221 } 222 223 if (preferred > offset) { 224 preferred = offset; 225 goto restart_scan; 226 } 227 return NULL; 228 229found: 230 bdata->last_success = start << PAGE_SHIFT; 231 BUG_ON(start >= eidx); 232 233 /* 234 * Is the next page of the previous allocation-end the start 235 * of this allocation's buffer? If yes then we can 'merge' 236 * the previous partial page with this allocation. 237 */ 238 if (align < PAGE_SIZE && 239 bdata->last_offset && bdata->last_pos+1 == start) { 240 offset = ALIGN(bdata->last_offset, align); 241 BUG_ON(offset > PAGE_SIZE); 242 remaining_size = PAGE_SIZE-offset; 243 if (size < remaining_size) { 244 areasize = 0; 245 /* last_pos unchanged */ 246 bdata->last_offset = offset+size; 247 ret = phys_to_virt(bdata->last_pos*PAGE_SIZE + offset + 248 bdata->node_boot_start); 249 } else { 250 remaining_size = size - remaining_size; 251 areasize = (remaining_size+PAGE_SIZE-1)/PAGE_SIZE; 252 ret = phys_to_virt(bdata->last_pos*PAGE_SIZE + offset + 253 bdata->node_boot_start); 254 bdata->last_pos = start+areasize-1; 255 bdata->last_offset = remaining_size; 256 } 257 bdata->last_offset &= ~PAGE_MASK; 258 } else { 259 bdata->last_pos = start + areasize - 1; 260 bdata->last_offset = size & ~PAGE_MASK; 261 ret = phys_to_virt(start * PAGE_SIZE + bdata->node_boot_start); 262 } 263 264 /* 265 * Reserve the area now: 266 */ 267 for (i = start; i < start+areasize; i++) 268 if (unlikely(test_and_set_bit(i, bdata->node_bootmem_map))) 269 BUG(); 270 memset(ret, 0, size); 271 return ret; 272} 273 274static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat) 275{ 276 struct page *page; 277 unsigned long pfn; 278 bootmem_data_t *bdata = pgdat->bdata; 279 unsigned long i, count, total = 0; 280 unsigned long idx; 281 unsigned long *map; 282 int gofast = 0; 283 284 BUG_ON(!bdata->node_bootmem_map); 285 286 count = 0; 287 /* first extant page of the node */ 288 pfn = bdata->node_boot_start >> PAGE_SHIFT; 289 idx = bdata->node_low_pfn - (bdata->node_boot_start >> PAGE_SHIFT); 290 map = bdata->node_bootmem_map; 291 /* Check physaddr is O(LOG2(BITS_PER_LONG)) page aligned */ 292 if (bdata->node_boot_start == 0 || 293 ffs(bdata->node_boot_start) - PAGE_SHIFT > ffs(BITS_PER_LONG)) 294 gofast = 1; 295 for (i = 0; i < idx; ) { 296 unsigned long v = ~map[i / BITS_PER_LONG]; 297 298 if (gofast && v == ~0UL) { 299 int order; 300 301 page = pfn_to_page(pfn); 302 count += BITS_PER_LONG; 303 order = ffs(BITS_PER_LONG) - 1; 304 __free_pages_bootmem(page, order); 305 i += BITS_PER_LONG; 306 page += BITS_PER_LONG; 307 } else if (v) { 308 unsigned long m; 309 310 page = pfn_to_page(pfn); 311 for (m = 1; m && i < idx; m<<=1, page++, i++) { 312 if (v & m) { 313 count++; 314 __free_pages_bootmem(page, 0); 315 } 316 } 317 } else { 318 i+=BITS_PER_LONG; 319 } 320 pfn += BITS_PER_LONG; 321 } 322 total += count; 323 324 /* 325 * Now free the allocator bitmap itself, it's not 326 * needed anymore: 327 */ 328 page = virt_to_page(bdata->node_bootmem_map); 329 count = 0; 330 for (i = 0; i < ((bdata->node_low_pfn-(bdata->node_boot_start >> PAGE_SHIFT))/8 + PAGE_SIZE-1)/PAGE_SIZE; i++,page++) { 331 count++; 332 __free_pages_bootmem(page, 0); 333 } 334 total += count; 335 bdata->node_bootmem_map = NULL; 336 337 return total; 338} 339 340unsigned long __init init_bootmem_node (pg_data_t *pgdat, unsigned long freepfn, unsigned long startpfn, unsigned long endpfn) 341{ 342 return(init_bootmem_core(pgdat, freepfn, startpfn, endpfn)); 343} 344 345void __init reserve_bootmem_node (pg_data_t *pgdat, unsigned long physaddr, unsigned long size) 346{ 347 reserve_bootmem_core(pgdat->bdata, physaddr, size); 348} 349 350void __init free_bootmem_node (pg_data_t *pgdat, unsigned long physaddr, unsigned long size) 351{ 352 free_bootmem_core(pgdat->bdata, physaddr, size); 353} 354 355unsigned long __init free_all_bootmem_node (pg_data_t *pgdat) 356{ 357 return(free_all_bootmem_core(pgdat)); 358} 359 360unsigned long __init init_bootmem (unsigned long start, unsigned long pages) 361{ 362 max_low_pfn = pages; 363 min_low_pfn = start; 364 return(init_bootmem_core(NODE_DATA(0), start, 0, pages)); 365} 366 367#ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE 368void __init reserve_bootmem (unsigned long addr, unsigned long size) 369{ 370 reserve_bootmem_core(NODE_DATA(0)->bdata, addr, size); 371} 372#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ 373 374void __init free_bootmem (unsigned long addr, unsigned long size) 375{ 376 free_bootmem_core(NODE_DATA(0)->bdata, addr, size); 377} 378 379unsigned long __init free_all_bootmem (void) 380{ 381 return(free_all_bootmem_core(NODE_DATA(0))); 382} 383 384void * __init __alloc_bootmem(unsigned long size, unsigned long align, unsigned long goal) 385{ 386 pg_data_t *pgdat = pgdat_list; 387 void *ptr; 388 389 for_each_pgdat(pgdat) 390 if ((ptr = __alloc_bootmem_core(pgdat->bdata, size, 391 align, goal, 0))) 392 return(ptr); 393 394 /* 395 * Whoops, we cannot satisfy the allocation request. 396 */ 397 printk(KERN_ALERT "bootmem alloc of %lu bytes failed!\n", size); 398 panic("Out of memory"); 399 return NULL; 400} 401 402 403void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, unsigned long align, 404 unsigned long goal) 405{ 406 void *ptr; 407 408 ptr = __alloc_bootmem_core(pgdat->bdata, size, align, goal, 0); 409 if (ptr) 410 return (ptr); 411 412 return __alloc_bootmem(size, align, goal); 413} 414 415#define LOW32LIMIT 0xffffffff 416 417void * __init __alloc_bootmem_low(unsigned long size, unsigned long align, unsigned long goal) 418{ 419 pg_data_t *pgdat = pgdat_list; 420 void *ptr; 421 422 for_each_pgdat(pgdat) 423 if ((ptr = __alloc_bootmem_core(pgdat->bdata, size, 424 align, goal, LOW32LIMIT))) 425 return(ptr); 426 427 /* 428 * Whoops, we cannot satisfy the allocation request. 429 */ 430 printk(KERN_ALERT "low bootmem alloc of %lu bytes failed!\n", size); 431 panic("Out of low memory"); 432 return NULL; 433} 434 435void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size, 436 unsigned long align, unsigned long goal) 437{ 438 return __alloc_bootmem_core(pgdat->bdata, size, align, goal, LOW32LIMIT); 439}