Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[PATCH] memory hotplug locking: node_size_lock

pgdat->node_size_lock is basically only neeeded in one place in the normal
code: show_mem(), which is the arch-specific sysrq-m printing function.

Strictly speaking, the architectures not doing memory hotplug do no need this
locking in show_mem(). However, they are all included for completeness. This
should also make any future consolidation of all of the implementations a
little more straightforward.

This lock is also held in the sparsemem code during a memory removal, as
sections are invalidated. This is the place there pfn_valid() is made false
for a memory area that's being removed. The lock is only required when doing
pfn_valid() operations on memory which the user does not already have a
reference on the page, such as in show_mem().

Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

authored by

Dave Hansen and committed by
Linus Torvalds
208d54e5 c6a57e19

+76 -2
+3
arch/alpha/mm/numa.c
··· 371 371 show_free_areas(); 372 372 printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); 373 373 for_each_online_node(nid) { 374 + unsigned long flags; 375 + pgdat_resize_lock(NODE_DATA(nid), &flags); 374 376 i = node_spanned_pages(nid); 375 377 while (i-- > 0) { 376 378 struct page *page = nid_page_nr(nid, i); ··· 386 384 else 387 385 shared += page_count(page) - 1; 388 386 } 387 + pgdat_resize_unlock(NODE_DATA(nid), &flags); 389 388 } 390 389 printk("%ld pages of RAM\n",total); 391 390 printk("%ld free pages\n",free);
+3
arch/i386/mm/pgtable.c
··· 31 31 pg_data_t *pgdat; 32 32 unsigned long i; 33 33 struct page_state ps; 34 + unsigned long flags; 34 35 35 36 printk(KERN_INFO "Mem-info:\n"); 36 37 show_free_areas(); 37 38 printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); 38 39 for_each_pgdat(pgdat) { 40 + pgdat_resize_lock(pgdat, &flags); 39 41 for (i = 0; i < pgdat->node_spanned_pages; ++i) { 40 42 page = pgdat_page_nr(pgdat, i); 41 43 total++; ··· 50 48 else if (page_count(page)) 51 49 shared += page_count(page) - 1; 52 50 } 51 + pgdat_resize_unlock(pgdat, &flags); 53 52 } 54 53 printk(KERN_INFO "%d pages of RAM\n", total); 55 54 printk(KERN_INFO "%d pages of HIGHMEM\n", highmem);
+6 -1
arch/ia64/mm/discontig.c
··· 555 555 show_free_areas(); 556 556 printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); 557 557 for_each_pgdat(pgdat) { 558 - unsigned long present = pgdat->node_present_pages; 558 + unsigned long present; 559 + unsigned long flags; 559 560 int shared = 0, cached = 0, reserved = 0; 561 + 560 562 printk("Node ID: %d\n", pgdat->node_id); 563 + pgdat_resize_lock(pgdat, &flags); 564 + present = pgdat->node_present_pages; 561 565 for(i = 0; i < pgdat->node_spanned_pages; i++) { 562 566 struct page *page; 563 567 if (pfn_valid(pgdat->node_start_pfn + i)) ··· 575 571 else if (page_count(page)) 576 572 shared += page_count(page)-1; 577 573 } 574 + pgdat_resize_unlock(pgdat, &flags); 578 575 total_present += present; 579 576 total_reserved += reserved; 580 577 total_cached += cached;
+8 -1
arch/m32r/mm/init.c
··· 48 48 show_free_areas(); 49 49 printk("Free swap: %6ldkB\n",nr_swap_pages<<(PAGE_SHIFT-10)); 50 50 for_each_pgdat(pgdat) { 51 + unsigned long flags; 52 + pgdat_resize_lock(pgdat, &flags); 51 53 for (i = 0; i < pgdat->node_spanned_pages; ++i) { 52 54 page = pgdat_page_nr(pgdat, i); 53 55 total++; ··· 62 60 else if (page_count(page)) 63 61 shared += page_count(page) - 1; 64 62 } 63 + pgdat_resize_unlock(pgdat, &flags); 65 64 } 66 65 printk("%d pages of RAM\n", total); 67 66 printk("%d pages of HIGHMEM\n",highmem); ··· 153 150 int reservedpages, nid, i; 154 151 155 152 reservedpages = 0; 156 - for_each_online_node(nid) 153 + for_each_online_node(nid) { 154 + unsigned long flags; 155 + pgdat_resize_lock(NODE_DATA(nid), &flags); 157 156 for (i = 0 ; i < MAX_LOW_PFN(nid) - START_PFN(nid) ; i++) 158 157 if (PageReserved(nid_page_nr(nid, i))) 159 158 reservedpages++; 159 + pgdat_resize_unlock(NODE_DATA(nid), &flags); 160 + } 160 161 161 162 return reservedpages; 162 163 }
+3
arch/parisc/mm/init.c
··· 505 505 506 506 for (j = node_start_pfn(i); j < node_end_pfn(i); j++) { 507 507 struct page *p; 508 + unsigned long flags; 508 509 510 + pgdat_resize_lock(NODE_DATA(i), &flags); 509 511 p = nid_page_nr(i, j) - node_start_pfn(i); 510 512 511 513 total++; ··· 519 517 free++; 520 518 else 521 519 shared += page_count(p) - 1; 520 + pgdat_resize_unlock(NODE_DATA(i), &flags); 522 521 } 523 522 } 524 523 #endif
+6
arch/ppc64/mm/init.c
··· 104 104 show_free_areas(); 105 105 printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); 106 106 for_each_pgdat(pgdat) { 107 + unsigned long flags; 108 + pgdat_resize_lock(pgdat, &flags); 107 109 for (i = 0; i < pgdat->node_spanned_pages; i++) { 108 110 page = pgdat_page_nr(pgdat, i); 109 111 total++; ··· 116 114 else if (page_count(page)) 117 115 shared += page_count(page) - 1; 118 116 } 117 + pgdat_resize_unlock(pgdat, &flags); 119 118 } 120 119 printk("%ld pages of RAM\n", total); 121 120 printk("%ld reserved pages\n", reserved); ··· 650 647 #endif 651 648 652 649 for_each_pgdat(pgdat) { 650 + unsigned long flags; 651 + pgdat_resize_lock(pgdat, &flags); 653 652 for (i = 0; i < pgdat->node_spanned_pages; i++) { 654 653 page = pgdat_page_nr(pgdat, i); 655 654 if (PageReserved(page)) 656 655 reservedpages++; 657 656 } 657 + pgdat_resize_unlock(pgdat, &flags); 658 658 } 659 659 660 660 codesize = (unsigned long)&_etext - (unsigned long)&_stext;
+34
include/linux/memory_hotplug.h
··· 1 + #ifndef __LINUX_MEMORY_HOTPLUG_H 2 + #define __LINUX_MEMORY_HOTPLUG_H 3 + 4 + #include <linux/mmzone.h> 5 + #include <linux/spinlock.h> 6 + 7 + #ifdef CONFIG_MEMORY_HOTPLUG 8 + /* 9 + * pgdat resizing functions 10 + */ 11 + static inline 12 + void pgdat_resize_lock(struct pglist_data *pgdat, unsigned long *flags) 13 + { 14 + spin_lock_irqsave(&pgdat->node_size_lock, *flags); 15 + } 16 + static inline 17 + void pgdat_resize_unlock(struct pglist_data *pgdat, unsigned long *flags) 18 + { 19 + spin_lock_irqrestore(&pgdat->node_size_lock, *flags); 20 + } 21 + static inline 22 + void pgdat_resize_init(struct pglist_data *pgdat) 23 + { 24 + spin_lock_init(&pgdat->node_size_lock); 25 + } 26 + #else /* ! CONFIG_MEMORY_HOTPLUG */ 27 + /* 28 + * Stub functions for when hotplug is off 29 + */ 30 + static inline void pgdat_resize_lock(struct pglist_data *p, unsigned long *f) {} 31 + static inline void pgdat_resize_unlock(struct pglist_data *p, unsigned long *f) {} 32 + static inline void pgdat_resize_init(struct pglist_data *pgdat) {} 33 + #endif 34 + #endif /* __LINUX_MEMORY_HOTPLUG_H */
+12
include/linux/mmzone.h
··· 273 273 struct page *node_mem_map; 274 274 #endif 275 275 struct bootmem_data *bdata; 276 + #ifdef CONFIG_MEMORY_HOTPLUG 277 + /* 278 + * Must be held any time you expect node_start_pfn, node_present_pages 279 + * or node_spanned_pages stay constant. Holding this will also 280 + * guarantee that any pfn_valid() stays that way. 281 + * 282 + * Nests above zone->lock and zone->size_seqlock. 283 + */ 284 + spinlock_t node_size_lock; 285 + #endif 276 286 unsigned long node_start_pfn; 277 287 unsigned long node_present_pages; /* total number of physical pages */ 278 288 unsigned long node_spanned_pages; /* total size of physical page ··· 302 292 #define pgdat_page_nr(pgdat, pagenr) pfn_to_page((pgdat)->node_start_pfn + (pagenr)) 303 293 #endif 304 294 #define nid_page_nr(nid, pagenr) pgdat_page_nr(NODE_DATA(nid),(pagenr)) 295 + 296 + #include <linux/memory_hotplug.h> 305 297 306 298 extern struct pglist_data *pgdat_list; 307 299
+1
mm/page_alloc.c
··· 1958 1958 int nid = pgdat->node_id; 1959 1959 unsigned long zone_start_pfn = pgdat->node_start_pfn; 1960 1960 1961 + pgdat_resize_init(pgdat); 1961 1962 pgdat->nr_zones = 0; 1962 1963 init_waitqueue_head(&pgdat->kswapd_wait); 1963 1964 pgdat->kswapd_max_order = 0;