Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

parisc: Switch from DISCONTIGMEM to SPARSEMEM

The commit 1c30844d2dfe ("mm: reclaim small amounts of memory when an
external fragmentation event occurs") breaks memory management on a
parisc c8000 workstation with this memory layout:

0) Start 0x0000000000000000 End 0x000000003fffffff Size 1024 MB
1) Start 0x0000000100000000 End 0x00000001bfdfffff Size 3070 MB
2) Start 0x0000004040000000 End 0x00000040ffffffff Size 3072 MB

With the patch 1c30844d2dfe, the kernel will incorrectly reclaim the
first zone when it fills up, ignoring the fact that there are two
completely free zones. Basiscally, it limits cache size to 1GiB.

The parisc kernel is currently using the DISCONTIGMEM implementation,
but isn't NUMA. Avoid this issue or strange work-arounds by switching to
the more commonly used SPARSEMEM implementation.

Reported-by: Mikulas Patocka <mpatocka@redhat.com>
Fixes: 1c30844d2dfe ("mm: reclaim small amounts of memory when an external fragmentation event occurs")
Signed-off-by: Helge Deller <deller@gmx.de>

+68 -128
+4 -8
arch/parisc/Kconfig
··· 36 36 select GENERIC_STRNCPY_FROM_USER 37 37 select SYSCTL_ARCH_UNALIGN_ALLOW 38 38 select SYSCTL_EXCEPTION_TRACE 39 + select ARCH_DISCARD_MEMBLOCK 39 40 select HAVE_MOD_ARCH_SPECIFIC 40 41 select VIRT_TO_BUS 41 42 select MODULES_USE_ELF_RELA ··· 315 314 def_bool y 316 315 depends on 64BIT 317 316 318 - config ARCH_DISCONTIGMEM_ENABLE 317 + config ARCH_SPARSEMEM_ENABLE 319 318 def_bool y 320 319 depends on 64BIT 321 320 322 321 config ARCH_FLATMEM_ENABLE 323 322 def_bool y 324 323 325 - config ARCH_DISCONTIGMEM_DEFAULT 324 + config ARCH_SPARSEMEM_DEFAULT 326 325 def_bool y 327 - depends on ARCH_DISCONTIGMEM_ENABLE 328 - 329 - config NODES_SHIFT 330 - int 331 - default "3" 332 - depends on NEED_MULTIPLE_NODES 326 + depends on ARCH_SPARSEMEM_ENABLE 333 327 334 328 source "kernel/Kconfig.hz" 335 329
+1 -57
arch/parisc/include/asm/mmzone.h
··· 2 2 #ifndef _PARISC_MMZONE_H 3 3 #define _PARISC_MMZONE_H 4 4 5 - #define MAX_PHYSMEM_RANGES 8 /* Fix the size for now (current known max is 3) */ 5 + #define MAX_PHYSMEM_RANGES 4 /* Fix the size for now (current known max is 3) */ 6 6 7 - #ifdef CONFIG_DISCONTIGMEM 8 - 9 - extern int npmem_ranges; 10 - 11 - struct node_map_data { 12 - pg_data_t pg_data; 13 - }; 14 - 15 - extern struct node_map_data node_data[]; 16 - 17 - #define NODE_DATA(nid) (&node_data[nid].pg_data) 18 - 19 - /* We have these possible memory map layouts: 20 - * Astro: 0-3.75, 67.75-68, 4-64 21 - * zx1: 0-1, 257-260, 4-256 22 - * Stretch (N-class): 0-2, 4-32, 34-xxx 23 - */ 24 - 25 - /* Since each 1GB can only belong to one region (node), we can create 26 - * an index table for pfn to nid lookup; each entry in pfnnid_map 27 - * represents 1GB, and contains the node that the memory belongs to. */ 28 - 29 - #define PFNNID_SHIFT (30 - PAGE_SHIFT) 30 - #define PFNNID_MAP_MAX 512 /* support 512GB */ 31 - extern signed char pfnnid_map[PFNNID_MAP_MAX]; 32 - 33 - #ifndef CONFIG_64BIT 34 - #define pfn_is_io(pfn) ((pfn & (0xf0000000UL >> PAGE_SHIFT)) == (0xf0000000UL >> PAGE_SHIFT)) 35 - #else 36 - /* io can be 0xf0f0f0f0f0xxxxxx or 0xfffffffff0000000 */ 37 - #define pfn_is_io(pfn) ((pfn & (0xf000000000000000UL >> PAGE_SHIFT)) == (0xf000000000000000UL >> PAGE_SHIFT)) 38 - #endif 39 - 40 - static inline int pfn_to_nid(unsigned long pfn) 41 - { 42 - unsigned int i; 43 - 44 - if (unlikely(pfn_is_io(pfn))) 45 - return 0; 46 - 47 - i = pfn >> PFNNID_SHIFT; 48 - BUG_ON(i >= ARRAY_SIZE(pfnnid_map)); 49 - 50 - return pfnnid_map[i]; 51 - } 52 - 53 - static inline int pfn_valid(int pfn) 54 - { 55 - int nid = pfn_to_nid(pfn); 56 - 57 - if (nid >= 0) 58 - return (pfn < node_end_pfn(nid)); 59 - return 0; 60 - } 61 - 62 - #endif 63 7 #endif /* _PARISC_MMZONE_H */
+2 -2
arch/parisc/include/asm/page.h
··· 147 147 #define __pa(x) ((unsigned long)(x)-PAGE_OFFSET) 148 148 #define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET)) 149 149 150 - #ifndef CONFIG_DISCONTIGMEM 150 + #ifndef CONFIG_SPARSEMEM 151 151 #define pfn_valid(pfn) ((pfn) < max_mapnr) 152 - #endif /* CONFIG_DISCONTIGMEM */ 152 + #endif 153 153 154 154 #ifdef CONFIG_HUGETLB_PAGE 155 155 #define HPAGE_SHIFT PMD_SHIFT /* fixed for transparent huge pages */
+14
arch/parisc/include/asm/sparsemem.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef ASM_PARISC_SPARSEMEM_H 3 + #define ASM_PARISC_SPARSEMEM_H 4 + 5 + /* We have these possible memory map layouts: 6 + * Astro: 0-3.75, 67.75-68, 4-64 7 + * zx1: 0-1, 257-260, 4-256 8 + * Stretch (N-class): 0-2, 4-32, 34-xxx 9 + */ 10 + 11 + #define MAX_PHYSMEM_BITS 39 /* 512 GB */ 12 + #define SECTION_SIZE_BITS 27 /* 128 MB */ 13 + 14 + #endif
-6
arch/parisc/kernel/parisc_ksyms.c
··· 138 138 EXPORT_SYMBOL($$dyncall); 139 139 #endif 140 140 141 - #ifdef CONFIG_DISCONTIGMEM 142 - #include <asm/mmzone.h> 143 - EXPORT_SYMBOL(node_data); 144 - EXPORT_SYMBOL(pfnnid_map); 145 - #endif 146 - 147 141 #ifdef CONFIG_FUNCTION_TRACER 148 142 extern void _mcount(void); 149 143 EXPORT_SYMBOL(_mcount);
+47 -55
arch/parisc/mm/init.c
··· 32 32 #include <asm/mmzone.h> 33 33 #include <asm/sections.h> 34 34 #include <asm/msgbuf.h> 35 + #include <asm/sparsemem.h> 35 36 36 37 extern int data_start; 37 38 extern void parisc_kernel_start(void); /* Kernel entry point in head.S */ ··· 48 47 49 48 pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__ ((__section__ (".data..vm0.pgd"), aligned(PAGE_SIZE))); 50 49 pte_t pg0[PT_INITIAL * PTRS_PER_PTE] __attribute__ ((__section__ (".data..vm0.pte"), aligned(PAGE_SIZE))); 51 - 52 - #ifdef CONFIG_DISCONTIGMEM 53 - struct node_map_data node_data[MAX_NUMNODES] __read_mostly; 54 - signed char pfnnid_map[PFNNID_MAP_MAX] __read_mostly; 55 - #endif 56 50 57 51 static struct resource data_resource = { 58 52 .name = "Kernel data", ··· 72 76 * information retrieved in kernel/inventory.c. 73 77 */ 74 78 75 - physmem_range_t pmem_ranges[MAX_PHYSMEM_RANGES] __read_mostly; 76 - int npmem_ranges __read_mostly; 79 + physmem_range_t pmem_ranges[MAX_PHYSMEM_RANGES] __initdata; 80 + int npmem_ranges __initdata; 77 81 78 82 #ifdef CONFIG_64BIT 79 - #define MAX_MEM (~0UL) 83 + #define MAX_MEM (1UL << MAX_PHYSMEM_BITS) 80 84 #else /* !CONFIG_64BIT */ 81 85 #define MAX_MEM (3584U*1024U*1024U) 82 86 #endif /* !CONFIG_64BIT */ ··· 115 119 static void __init setup_bootmem(void) 116 120 { 117 121 unsigned long mem_max; 118 - #ifndef CONFIG_DISCONTIGMEM 122 + #ifndef CONFIG_SPARSEMEM 119 123 physmem_range_t pmem_holes[MAX_PHYSMEM_RANGES - 1]; 120 124 int npmem_holes; 121 125 #endif ··· 133 137 int j; 134 138 135 139 for (j = i; j > 0; j--) { 136 - unsigned long tmp; 140 + physmem_range_t tmp; 137 141 138 142 if (pmem_ranges[j-1].start_pfn < 139 143 pmem_ranges[j].start_pfn) { 140 144 141 145 break; 142 146 } 143 - tmp = pmem_ranges[j-1].start_pfn; 144 - pmem_ranges[j-1].start_pfn = pmem_ranges[j].start_pfn; 145 - pmem_ranges[j].start_pfn = tmp; 146 - tmp = pmem_ranges[j-1].pages; 147 - pmem_ranges[j-1].pages = pmem_ranges[j].pages; 148 - pmem_ranges[j].pages = tmp; 147 + tmp = pmem_ranges[j-1]; 148 + pmem_ranges[j-1] = pmem_ranges[j]; 149 + pmem_ranges[j] = tmp; 149 150 } 150 151 } 151 152 152 - #ifndef CONFIG_DISCONTIGMEM 153 + #ifndef CONFIG_SPARSEMEM 153 154 /* 154 155 * Throw out ranges that are too far apart (controlled by 155 156 * MAX_GAP). ··· 158 165 pmem_ranges[i-1].pages) > MAX_GAP) { 159 166 npmem_ranges = i; 160 167 printk("Large gap in memory detected (%ld pages). " 161 - "Consider turning on CONFIG_DISCONTIGMEM\n", 168 + "Consider turning on CONFIG_SPARSEMEM\n", 162 169 pmem_ranges[i].start_pfn - 163 170 (pmem_ranges[i-1].start_pfn + 164 171 pmem_ranges[i-1].pages)); ··· 223 230 224 231 printk(KERN_INFO "Total Memory: %ld MB\n",mem_max >> 20); 225 232 226 - #ifndef CONFIG_DISCONTIGMEM 233 + #ifndef CONFIG_SPARSEMEM 227 234 /* Merge the ranges, keeping track of the holes */ 228 - 229 235 { 230 236 unsigned long end_pfn; 231 237 unsigned long hole_pages; ··· 244 252 245 253 pmem_ranges[0].pages = end_pfn - pmem_ranges[0].start_pfn; 246 254 npmem_ranges = 1; 247 - } 248 - #endif 249 - 250 - #ifdef CONFIG_DISCONTIGMEM 251 - for (i = 0; i < MAX_PHYSMEM_RANGES; i++) { 252 - memset(NODE_DATA(i), 0, sizeof(pg_data_t)); 253 - } 254 - memset(pfnnid_map, 0xff, sizeof(pfnnid_map)); 255 - 256 - for (i = 0; i < npmem_ranges; i++) { 257 - node_set_state(i, N_NORMAL_MEMORY); 258 - node_set_online(i); 259 255 } 260 256 #endif 261 257 ··· 294 314 memblock_reserve(__pa(KERNEL_BINARY_TEXT_START), 295 315 (unsigned long)(_end - KERNEL_BINARY_TEXT_START)); 296 316 297 - #ifndef CONFIG_DISCONTIGMEM 317 + #ifndef CONFIG_SPARSEMEM 298 318 299 319 /* reserve the holes */ 300 320 ··· 340 360 341 361 /* Initialize Page Deallocation Table (PDT) and check for bad memory. */ 342 362 pdc_pdt_init(); 363 + 364 + memblock_allow_resize(); 365 + memblock_dump_all(); 343 366 } 344 367 345 368 static int __init parisc_text_address(unsigned long vaddr) ··· 696 713 PAGE_SIZE, PAGE_GATEWAY, 1); 697 714 } 698 715 699 - void __init paging_init(void) 716 + static void __init parisc_bootmem_free(void) 700 717 { 718 + unsigned long zones_size[MAX_NR_ZONES] = { 0, }; 719 + unsigned long holes_size[MAX_NR_ZONES] = { 0, }; 720 + unsigned long mem_start_pfn = ~0UL, mem_end_pfn = 0, mem_size_pfn = 0; 701 721 int i; 702 722 723 + for (i = 0; i < npmem_ranges; i++) { 724 + unsigned long start = pmem_ranges[i].start_pfn; 725 + unsigned long size = pmem_ranges[i].pages; 726 + unsigned long end = start + size; 727 + 728 + if (mem_start_pfn > start) 729 + mem_start_pfn = start; 730 + if (mem_end_pfn < end) 731 + mem_end_pfn = end; 732 + mem_size_pfn += size; 733 + } 734 + 735 + zones_size[0] = mem_end_pfn - mem_start_pfn; 736 + holes_size[0] = zones_size[0] - mem_size_pfn; 737 + 738 + free_area_init_node(0, zones_size, mem_start_pfn, holes_size); 739 + } 740 + 741 + void __init paging_init(void) 742 + { 703 743 setup_bootmem(); 704 744 pagetable_init(); 705 745 gateway_init(); 706 746 flush_cache_all_local(); /* start with known state */ 707 747 flush_tlb_all_local(NULL); 708 748 709 - for (i = 0; i < npmem_ranges; i++) { 710 - unsigned long zones_size[MAX_NR_ZONES] = { 0, }; 711 - 712 - zones_size[ZONE_NORMAL] = pmem_ranges[i].pages; 713 - 714 - #ifdef CONFIG_DISCONTIGMEM 715 - /* Need to initialize the pfnnid_map before we can initialize 716 - the zone */ 717 - { 718 - int j; 719 - for (j = (pmem_ranges[i].start_pfn >> PFNNID_SHIFT); 720 - j <= ((pmem_ranges[i].start_pfn + pmem_ranges[i].pages) >> PFNNID_SHIFT); 721 - j++) { 722 - pfnnid_map[j] = i; 723 - } 724 - } 725 - #endif 726 - 727 - free_area_init_node(i, zones_size, 728 - pmem_ranges[i].start_pfn, NULL); 729 - } 749 + /* 750 + * Mark all memblocks as present for sparsemem using 751 + * memory_present() and then initialize sparsemem. 752 + */ 753 + memblocks_present(); 754 + sparse_init(); 755 + parisc_bootmem_free(); 730 756 } 731 757 732 758 #ifdef CONFIG_PA20