Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[PATCH] sparsemem memory model for i386

Provide the architecture specific implementation for SPARSEMEM for i386 SMP
and NUMA systems.

Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
Signed-off-by: Martin Bligh <mbligh@aracnet.com>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

authored by

Andy Whitcroft and committed by
Linus Torvalds
05b79bdc d41dee36

+137 -83
+17 -7
arch/i386/Kconfig
··· 68 68 69 69 config X86_NUMAQ 70 70 bool "NUMAQ (IBM/Sequent)" 71 - select DISCONTIGMEM 72 71 select NUMA 73 72 help 74 73 This option is used for getting Linux to run on a (IBM/Sequent) NUMA ··· 782 783 comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI" 783 784 depends on X86_SUMMIT && (!HIGHMEM64G || !ACPI) 784 785 785 - config ARCH_DISCONTIGMEM_ENABLE 786 - bool 787 - depends on NUMA 788 - default y 789 - 790 786 config HAVE_ARCH_BOOTMEM_NODE 791 787 bool 792 788 depends on NUMA ··· 794 800 795 801 config NEED_NODE_MEMMAP_SIZE 796 802 bool 797 - depends on DISCONTIGMEM 803 + depends on DISCONTIGMEM || SPARSEMEM 798 804 default y 799 805 800 806 config HAVE_ARCH_ALLOC_REMAP 801 807 bool 802 808 depends on NUMA 803 809 default y 810 + 811 + config ARCH_DISCONTIGMEM_ENABLE 812 + def_bool y 813 + depends on NUMA 814 + 815 + config ARCH_DISCONTIGMEM_DEFAULT 816 + def_bool y 817 + depends on NUMA 818 + 819 + config ARCH_SPARSEMEM_ENABLE 820 + def_bool y 821 + depends on NUMA 822 + 823 + config ARCH_SELECT_MEMORY_MODEL 824 + def_bool y 825 + depends on ARCH_SPARSEMEM_ENABLE 804 826 805 827 source "mm/Kconfig" 806 828
+5 -3
arch/i386/kernel/setup.c
··· 25 25 26 26 #include <linux/sched.h> 27 27 #include <linux/mm.h> 28 + #include <linux/mmzone.h> 28 29 #include <linux/tty.h> 29 30 #include <linux/ioport.h> 30 31 #include <linux/acpi.h> ··· 1023 1022 reserve_bootmem(addr, PAGE_SIZE); 1024 1023 } 1025 1024 1026 - #ifndef CONFIG_DISCONTIGMEM 1025 + #ifndef CONFIG_NEED_MULTIPLE_NODES 1027 1026 void __init setup_bootmem_allocator(void); 1028 1027 static unsigned long __init setup_memory(void) 1029 1028 { ··· 1073 1072 free_area_init(zones_size); 1074 1073 } 1075 1074 #else 1076 - extern unsigned long setup_memory(void); 1075 + extern unsigned long __init setup_memory(void); 1077 1076 extern void zone_sizes_init(void); 1078 - #endif /* !CONFIG_DISCONTIGMEM */ 1077 + #endif /* !CONFIG_NEED_MULTIPLE_NODES */ 1079 1078 1080 1079 void __init setup_bootmem_allocator(void) 1081 1080 { ··· 1476 1475 #endif 1477 1476 paging_init(); 1478 1477 remapped_pgdat_init(); 1478 + sparse_init(); 1479 1479 zone_sizes_init(); 1480 1480 1481 1481 /*
+1 -1
arch/i386/mm/Makefile
··· 4 4 5 5 obj-y := init.o pgtable.o fault.o ioremap.o extable.o pageattr.o mmap.o 6 6 7 - obj-$(CONFIG_DISCONTIGMEM) += discontig.o 7 + obj-$(CONFIG_NUMA) += discontig.o 8 8 obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o 9 9 obj-$(CONFIG_HIGHMEM) += highmem.o 10 10 obj-$(CONFIG_BOOT_IOREMAP) += boot_ioremap.o
+20 -16
arch/i386/mm/discontig.c
··· 42 42 * populated the following initialisation. 43 43 * 44 44 * 1) node_online_map - the map of all nodes configured (online) in the system 45 - * 2) physnode_map - the mapping between a pfn and owning node 46 - * 3) node_start_pfn - the starting page frame number for a node 45 + * 2) node_start_pfn - the starting page frame number for a node 47 46 * 3) node_end_pfn - the ending page fram number for a node 48 47 */ 48 + unsigned long node_start_pfn[MAX_NUMNODES]; 49 + unsigned long node_end_pfn[MAX_NUMNODES]; 49 50 51 + 52 + #ifdef CONFIG_DISCONTIGMEM 50 53 /* 54 + * 4) physnode_map - the mapping between a pfn and owning node 51 55 * physnode_map keeps track of the physical memory layout of a generic 52 56 * numa node on a 256Mb break (each element of the array will 53 57 * represent 256Mb of memory and will be marked by the node id. so, ··· 89 85 90 86 return (nr_pages + 1) * sizeof(struct page); 91 87 } 92 - 93 - unsigned long node_start_pfn[MAX_NUMNODES]; 94 - unsigned long node_end_pfn[MAX_NUMNODES]; 88 + #endif 95 89 96 90 extern unsigned long find_max_low_pfn(void); 97 91 extern void find_max_pfn(void); ··· 392 390 { 393 391 #ifdef CONFIG_HIGHMEM 394 392 struct zone *zone; 393 + struct page *page; 395 394 396 395 for_each_zone(zone) { 397 - unsigned long node_pfn, node_high_size, zone_start_pfn; 398 - struct page * zone_mem_map; 399 - 396 + unsigned long node_pfn, zone_start_pfn, zone_end_pfn; 397 + 400 398 if (!is_highmem(zone)) 401 399 continue; 402 400 403 - printk("Initializing %s for node %d\n", zone->name, 404 - zone->zone_pgdat->node_id); 405 - 406 - node_high_size = zone->spanned_pages; 407 - zone_mem_map = zone->zone_mem_map; 408 401 zone_start_pfn = zone->zone_start_pfn; 402 + zone_end_pfn = zone_start_pfn + zone->spanned_pages; 409 403 410 - for (node_pfn = 0; node_pfn < node_high_size; node_pfn++) { 411 - one_highpage_init((struct page *)(zone_mem_map + node_pfn), 412 - zone_start_pfn + node_pfn, bad_ppro); 404 + printk("Initializing %s for node %d (%08lx:%08lx)\n", 405 + zone->name, zone->zone_pgdat->node_id, 406 + zone_start_pfn, zone_end_pfn); 407 + 408 + for (node_pfn = zone_start_pfn; node_pfn < zone_end_pfn; node_pfn++) { 409 + if (!pfn_valid(node_pfn)) 410 + continue; 411 + page = pfn_to_page(node_pfn); 412 + one_highpage_init(page, node_pfn, bad_ppro); 413 413 } 414 414 } 415 415 totalram_pages += totalhigh_pages;
+9 -9
arch/i386/mm/init.c
··· 276 276 SetPageReserved(page); 277 277 } 278 278 279 - #ifndef CONFIG_DISCONTIGMEM 279 + #ifdef CONFIG_NUMA 280 + extern void set_highmem_pages_init(int); 281 + #else 280 282 static void __init set_highmem_pages_init(int bad_ppro) 281 283 { 282 284 int pfn; ··· 286 284 one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro); 287 285 totalram_pages += totalhigh_pages; 288 286 } 289 - #else 290 - extern void set_highmem_pages_init(int); 291 - #endif /* !CONFIG_DISCONTIGMEM */ 287 + #endif /* CONFIG_FLATMEM */ 292 288 293 289 #else 294 290 #define kmap_init() do { } while (0) ··· 297 297 unsigned long long __PAGE_KERNEL = _PAGE_KERNEL; 298 298 unsigned long long __PAGE_KERNEL_EXEC = _PAGE_KERNEL_EXEC; 299 299 300 - #ifndef CONFIG_DISCONTIGMEM 301 - #define remap_numa_kva() do {} while (0) 302 - #else 300 + #ifdef CONFIG_NUMA 303 301 extern void __init remap_numa_kva(void); 302 + #else 303 + #define remap_numa_kva() do {} while (0) 304 304 #endif 305 305 306 306 static void __init pagetable_init (void) ··· 525 525 #else 526 526 num_physpages = max_low_pfn; 527 527 #endif 528 - #ifndef CONFIG_DISCONTIGMEM 528 + #ifdef CONFIG_FLATMEM 529 529 max_mapnr = num_physpages; 530 530 #endif 531 531 } ··· 539 539 int tmp; 540 540 int bad_ppro; 541 541 542 - #ifndef CONFIG_DISCONTIGMEM 542 + #ifdef CONFIG_FLATMEM 543 543 if (!mem_map) 544 544 BUG(); 545 545 #endif
+50 -43
include/asm-i386/mmzone.h
··· 8 8 9 9 #include <asm/smp.h> 10 10 11 - #ifdef CONFIG_DISCONTIGMEM 11 + #if CONFIG_NUMA 12 + extern struct pglist_data *node_data[]; 13 + #define NODE_DATA(nid) (node_data[nid]) 12 14 13 15 #ifdef CONFIG_NUMA 14 16 #ifdef CONFIG_X86_NUMAQ ··· 23 21 #define get_zholes_size(n) (0) 24 22 #endif /* CONFIG_NUMA */ 25 23 26 - extern struct pglist_data *node_data[]; 27 - #define NODE_DATA(nid) (node_data[nid]) 24 + extern int get_memcfg_numa_flat(void ); 25 + /* 26 + * This allows any one NUMA architecture to be compiled 27 + * for, and still fall back to the flat function if it 28 + * fails. 29 + */ 30 + static inline void get_memcfg_numa(void) 31 + { 32 + #ifdef CONFIG_X86_NUMAQ 33 + if (get_memcfg_numaq()) 34 + return; 35 + #elif CONFIG_ACPI_SRAT 36 + if (get_memcfg_from_srat()) 37 + return; 38 + #endif 39 + 40 + get_memcfg_numa_flat(); 41 + } 42 + 43 + #endif /* CONFIG_NUMA */ 44 + 45 + #ifdef CONFIG_DISCONTIGMEM 28 46 29 47 /* 30 48 * generic node memory support, the following assumptions apply: ··· 69 47 return 0; 70 48 #endif 71 49 } 72 - 73 - /* 74 - * Following are macros that are specific to this numa platform. 75 - */ 76 - #define reserve_bootmem(addr, size) \ 77 - reserve_bootmem_node(NODE_DATA(0), (addr), (size)) 78 - #define alloc_bootmem(x) \ 79 - __alloc_bootmem_node(NODE_DATA(0), (x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) 80 - #define alloc_bootmem_low(x) \ 81 - __alloc_bootmem_node(NODE_DATA(0), (x), SMP_CACHE_BYTES, 0) 82 - #define alloc_bootmem_pages(x) \ 83 - __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) 84 - #define alloc_bootmem_low_pages(x) \ 85 - __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, 0) 86 - #define alloc_bootmem_node(ignore, x) \ 87 - __alloc_bootmem_node(NODE_DATA(0), (x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) 88 - #define alloc_bootmem_pages_node(ignore, x) \ 89 - __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) 90 - #define alloc_bootmem_low_pages_node(ignore, x) \ 91 - __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, 0) 92 50 93 51 #define node_localnr(pfn, nid) ((pfn) - node_data[nid]->node_start_pfn) 94 52 ··· 123 121 return (pfn < node_end_pfn(nid)); 124 122 return 0; 125 123 } 126 - #endif 127 - 128 - extern int get_memcfg_numa_flat(void ); 129 - /* 130 - * This allows any one NUMA architecture to be compiled 131 - * for, and still fall back to the flat function if it 132 - * fails. 133 - */ 134 - static inline void get_memcfg_numa(void) 135 - { 136 - #ifdef CONFIG_X86_NUMAQ 137 - if (get_memcfg_numaq()) 138 - return; 139 - #elif CONFIG_ACPI_SRAT 140 - if (get_memcfg_from_srat()) 141 - return; 142 - #endif 143 - 144 - get_memcfg_numa_flat(); 145 - } 124 + #endif /* CONFIG_X86_NUMAQ */ 146 125 147 126 #endif /* CONFIG_DISCONTIGMEM */ 127 + 128 + #ifdef CONFIG_NEED_MULTIPLE_NODES 129 + 130 + /* 131 + * Following are macros that are specific to this numa platform. 132 + */ 133 + #define reserve_bootmem(addr, size) \ 134 + reserve_bootmem_node(NODE_DATA(0), (addr), (size)) 135 + #define alloc_bootmem(x) \ 136 + __alloc_bootmem_node(NODE_DATA(0), (x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) 137 + #define alloc_bootmem_low(x) \ 138 + __alloc_bootmem_node(NODE_DATA(0), (x), SMP_CACHE_BYTES, 0) 139 + #define alloc_bootmem_pages(x) \ 140 + __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) 141 + #define alloc_bootmem_low_pages(x) \ 142 + __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, 0) 143 + #define alloc_bootmem_node(ignore, x) \ 144 + __alloc_bootmem_node(NODE_DATA(0), (x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) 145 + #define alloc_bootmem_pages_node(ignore, x) \ 146 + __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) 147 + #define alloc_bootmem_low_pages_node(ignore, x) \ 148 + __alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, 0) 149 + 150 + #endif /* CONFIG_NEED_MULTIPLE_NODES */ 148 151 149 152 extern int early_pfn_to_nid(unsigned long pfn); 150 153
+2 -2
include/asm-i386/page.h
··· 137 137 #define __pa(x) ((unsigned long)(x)-PAGE_OFFSET) 138 138 #define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) 139 139 #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) 140 - #ifndef CONFIG_DISCONTIGMEM 140 + #ifdef CONFIG_FLATMEM 141 141 #define pfn_to_page(pfn) (mem_map + (pfn)) 142 142 #define page_to_pfn(page) ((unsigned long)((page) - mem_map)) 143 143 #define pfn_valid(pfn) ((pfn) < max_mapnr) 144 - #endif /* !CONFIG_DISCONTIGMEM */ 144 + #endif /* CONFIG_FLATMEM */ 145 145 #define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) 146 146 147 147 #define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
+2 -2
include/asm-i386/pgtable.h
··· 398 398 399 399 #endif /* !__ASSEMBLY__ */ 400 400 401 - #ifndef CONFIG_DISCONTIGMEM 401 + #ifdef CONFIG_FLATMEM 402 402 #define kern_addr_valid(addr) (1) 403 - #endif /* !CONFIG_DISCONTIGMEM */ 403 + #endif /* CONFIG_FLATMEM */ 404 404 405 405 #define io_remap_page_range(vma, vaddr, paddr, size, prot) \ 406 406 remap_pfn_range(vma, vaddr, (paddr) >> PAGE_SHIFT, size, prot)
+31
include/asm-i386/sparsemem.h
··· 1 + #ifndef _I386_SPARSEMEM_H 2 + #define _I386_SPARSEMEM_H 3 + #ifdef CONFIG_SPARSEMEM 4 + 5 + /* 6 + * generic non-linear memory support: 7 + * 8 + * 1) we will not split memory into more chunks than will fit into the 9 + * flags field of the struct page 10 + */ 11 + 12 + /* 13 + * SECTION_SIZE_BITS 2^N: how big each section will be 14 + * MAX_PHYSADDR_BITS 2^N: how much physical address space we have 15 + * MAX_PHYSMEM_BITS 2^N: how much memory we can have in that space 16 + */ 17 + #ifdef CONFIG_X86_PAE 18 + #define SECTION_SIZE_BITS 30 19 + #define MAX_PHYSADDR_BITS 36 20 + #define MAX_PHYSMEM_BITS 36 21 + #else 22 + #define SECTION_SIZE_BITS 26 23 + #define MAX_PHYSADDR_BITS 32 24 + #define MAX_PHYSMEM_BITS 32 25 + #endif 26 + 27 + /* XXX: FIXME -- wli */ 28 + #define kern_addr_valid(kaddr) (0) 29 + 30 + #endif /* CONFIG_SPARSEMEM */ 31 + #endif /* _I386_SPARSEMEM_H */