Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[PATCH] memory hotplug locking: zone span seqlock

See the "fixup bad_range()" patch for more information, but this actually
creates a the lock to protect things making assumptions about a zone's size
staying constant at runtime.

Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

authored by

Dave Hansen and committed by
Linus Torvalds
bdc8cb98 208d54e5

+66 -7
+37 -2
include/linux/memory_hotplug.h
··· 16 16 static inline 17 17 void pgdat_resize_unlock(struct pglist_data *pgdat, unsigned long *flags) 18 18 { 19 - spin_lock_irqrestore(&pgdat->node_size_lock, *flags); 19 + spin_unlock_irqrestore(&pgdat->node_size_lock, *flags); 20 20 } 21 21 static inline 22 22 void pgdat_resize_init(struct pglist_data *pgdat) 23 23 { 24 24 spin_lock_init(&pgdat->node_size_lock); 25 + } 26 + /* 27 + * Zone resizing functions 28 + */ 29 + static inline unsigned zone_span_seqbegin(struct zone *zone) 30 + { 31 + return read_seqbegin(&zone->span_seqlock); 32 + } 33 + static inline int zone_span_seqretry(struct zone *zone, unsigned iv) 34 + { 35 + return read_seqretry(&zone->span_seqlock, iv); 36 + } 37 + static inline void zone_span_writelock(struct zone *zone) 38 + { 39 + write_seqlock(&zone->span_seqlock); 40 + } 41 + static inline void zone_span_writeunlock(struct zone *zone) 42 + { 43 + write_sequnlock(&zone->span_seqlock); 44 + } 45 + static inline void zone_seqlock_init(struct zone *zone) 46 + { 47 + seqlock_init(&zone->span_seqlock); 25 48 } 26 49 #else /* ! CONFIG_MEMORY_HOTPLUG */ 27 50 /* ··· 53 30 static inline void pgdat_resize_lock(struct pglist_data *p, unsigned long *f) {} 54 31 static inline void pgdat_resize_unlock(struct pglist_data *p, unsigned long *f) {} 55 32 static inline void pgdat_resize_init(struct pglist_data *pgdat) {} 56 - #endif 33 + 34 + static inline unsigned zone_span_seqbegin(struct zone *zone) 35 + { 36 + return 0; 37 + } 38 + static inline int zone_span_seqretry(struct zone *zone, unsigned iv) 39 + { 40 + return 0; 41 + } 42 + static inline void zone_span_writelock(struct zone *zone) {} 43 + static inline void zone_span_writeunlock(struct zone *zone) {} 44 + static inline void zone_seqlock_init(struct zone *zone) {} 45 + #endif /* ! CONFIG_MEMORY_HOTPLUG */ 57 46 #endif /* __LINUX_MEMORY_HOTPLUG_H */
+15
include/linux/mmzone.h
··· 12 12 #include <linux/threads.h> 13 13 #include <linux/numa.h> 14 14 #include <linux/init.h> 15 + #include <linux/seqlock.h> 15 16 #include <asm/atomic.h> 16 17 17 18 /* Free memory management - zoned buddy allocator. */ ··· 138 137 * free areas of different sizes 139 138 */ 140 139 spinlock_t lock; 140 + #ifdef CONFIG_MEMORY_HOTPLUG 141 + /* see spanned/present_pages for more description */ 142 + seqlock_t span_seqlock; 143 + #endif 141 144 struct free_area free_area[MAX_ORDER]; 142 145 143 146 ··· 225 220 /* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */ 226 221 unsigned long zone_start_pfn; 227 222 223 + /* 224 + * zone_start_pfn, spanned_pages and present_pages are all 225 + * protected by span_seqlock. It is a seqlock because it has 226 + * to be read outside of zone->lock, and it is done in the main 227 + * allocator path. But, it is written quite infrequently. 228 + * 229 + * The lock is declared along with zone->lock because it is 230 + * frequently read in proximity to zone->lock. It's good to 231 + * give them a chance of being in the same cacheline. 232 + */ 228 233 unsigned long spanned_pages; /* total size, including holes */ 229 234 unsigned long present_pages; /* amount of memory (excluding holes) */ 230 235
+14 -5
mm/page_alloc.c
··· 33 33 #include <linux/sysctl.h> 34 34 #include <linux/cpu.h> 35 35 #include <linux/cpuset.h> 36 + #include <linux/memory_hotplug.h> 36 37 #include <linux/nodemask.h> 37 38 #include <linux/vmalloc.h> 38 39 ··· 81 80 82 81 static int page_outside_zone_boundaries(struct zone *zone, struct page *page) 83 82 { 84 - if (page_to_pfn(page) >= zone->zone_start_pfn + zone->spanned_pages) 85 - return 1; 86 - if (page_to_pfn(page) < zone->zone_start_pfn) 87 - return 1; 83 + int ret = 0; 84 + unsigned seq; 85 + unsigned long pfn = page_to_pfn(page); 88 86 89 - return 0; 87 + do { 88 + seq = zone_span_seqbegin(zone); 89 + if (pfn >= zone->zone_start_pfn + zone->spanned_pages) 90 + ret = 1; 91 + else if (pfn < zone->zone_start_pfn) 92 + ret = 1; 93 + } while (zone_span_seqretry(zone, seq)); 94 + 95 + return ret; 90 96 } 91 97 92 98 static int page_is_consistent(struct zone *zone, struct page *page) ··· 1988 1980 zone->name = zone_names[j]; 1989 1981 spin_lock_init(&zone->lock); 1990 1982 spin_lock_init(&zone->lru_lock); 1983 + zone_seqlock_init(zone); 1991 1984 zone->zone_pgdat = pgdat; 1992 1985 zone->free_pages = 0; 1993 1986