commit 0c7fc30f18220c0d1b8fb2636811a0c0d0d65714 · tjh.dev/kernel

+1

arch/sparc/include/asm/page_64.h

··· 25 25 #define HPAGE_MASK (~(HPAGE_SIZE - 1UL)) 26 26 #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) 27 27 #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA 28 + #define REAL_HPAGE_PER_HPAGE (_AC(1,UL) << (HPAGE_SHIFT - REAL_HPAGE_SHIFT)) 28 29 #endif 29 30 30 31 #ifndef __ASSEMBLY__

+2

arch/sparc/include/asm/smp_64.h

··· 43 43 int hard_smp_processor_id(void); 44 44 #define raw_smp_processor_id() (current_thread_info()->cpu) 45 45 46 + void smp_fill_in_cpu_possible_map(void); 46 47 void smp_fill_in_sib_core_maps(void); 47 48 void cpu_play_dead(void); 48 49 ··· 73 72 #define smp_fill_in_sib_core_maps() do { } while (0) 74 73 #define smp_fetch_global_regs() do { } while (0) 75 74 #define smp_fetch_global_pmu() do { } while (0) 75 + #define smp_fill_in_cpu_possible_map() do { } while (0) 76 76 77 77 #endif /* !(CONFIG_SMP) */ 78 78

+26

arch/sparc/kernel/setup_64.c

··· 31 31 #include <linux/initrd.h> 32 32 #include <linux/module.h> 33 33 #include <linux/start_kernel.h> 34 + #include <linux/bootmem.h> 34 35 35 36 #include <asm/io.h> 36 37 #include <asm/processor.h> ··· 51 50 #include <asm/elf.h> 52 51 #include <asm/mdesc.h> 53 52 #include <asm/cacheflush.h> 53 + #include <asm/dma.h> 54 + #include <asm/irq.h> 54 55 55 56 #ifdef CONFIG_IP_PNP 56 57 #include <net/ipconfig.h> ··· 593 590 pause_patch(); 594 591 } 595 592 593 + void __init alloc_irqstack_bootmem(void) 594 + { 595 + unsigned int i, node; 596 + 597 + for_each_possible_cpu(i) { 598 + node = cpu_to_node(i); 599 + 600 + softirq_stack[i] = __alloc_bootmem_node(NODE_DATA(node), 601 + THREAD_SIZE, 602 + THREAD_SIZE, 0); 603 + hardirq_stack[i] = __alloc_bootmem_node(NODE_DATA(node), 604 + THREAD_SIZE, 605 + THREAD_SIZE, 0); 606 + } 607 + } 608 + 596 609 void __init setup_arch(char **cmdline_p) 597 610 { 598 611 /* Initialize PROM console and command line. */ ··· 669 650 670 651 paging_init(); 671 652 init_sparc64_elf_hwcap(); 653 + smp_fill_in_cpu_possible_map(); 654 + /* 655 + * Once the OF device tree and MDESC have been setup and nr_cpus has 656 + * been parsed, we know the list of possible cpus. Therefore we can 657 + * allocate the IRQ stacks. 658 + */ 659 + alloc_irqstack_bootmem(); 672 660 } 673 661 674 662 extern int stop_a_enabled;

+14

arch/sparc/kernel/smp_64.c

··· 1227 1227 xcall_deliver_impl = hypervisor_xcall_deliver; 1228 1228 } 1229 1229 1230 + void __init smp_fill_in_cpu_possible_map(void) 1231 + { 1232 + int possible_cpus = num_possible_cpus(); 1233 + int i; 1234 + 1235 + if (possible_cpus > nr_cpu_ids) 1236 + possible_cpus = nr_cpu_ids; 1237 + 1238 + for (i = 0; i < possible_cpus; i++) 1239 + set_cpu_possible(i, true); 1240 + for (; i < NR_CPUS; i++) 1241 + set_cpu_possible(i, false); 1242 + } 1243 + 1230 1244 void smp_fill_in_sib_core_maps(void) 1231 1245 { 1232 1246 unsigned int i;

+1

arch/sparc/mm/fault_64.c

··· 484 484 tsb_grow(mm, MM_TSB_BASE, mm_rss); 485 485 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) 486 486 mm_rss = mm->context.hugetlb_pte_count + mm->context.thp_pte_count; 487 + mm_rss *= REAL_HPAGE_PER_HPAGE; 487 488 if (unlikely(mm_rss > 488 489 mm->context.tsb_block[MM_TSB_HUGE].tsb_rss_limit)) { 489 490 if (mm->context.tsb_block[MM_TSB_HUGE].tsb)

+3 -19

arch/sparc/mm/init_64.c

··· 1160 1160 return numa_latency[from][to]; 1161 1161 } 1162 1162 1163 - static int find_best_numa_node_for_mlgroup(struct mdesc_mlgroup *grp) 1163 + static int __init find_best_numa_node_for_mlgroup(struct mdesc_mlgroup *grp) 1164 1164 { 1165 1165 int i; 1166 1166 ··· 1173 1173 return i; 1174 1174 } 1175 1175 1176 - static void find_numa_latencies_for_group(struct mdesc_handle *md, u64 grp, 1177 - int index) 1176 + static void __init find_numa_latencies_for_group(struct mdesc_handle *md, 1177 + u64 grp, int index) 1178 1178 { 1179 1179 u64 arc; 1180 1180 ··· 2081 2081 { 2082 2082 unsigned long end_pfn, shift, phys_base; 2083 2083 unsigned long real_end, i; 2084 - int node; 2085 2084 2086 2085 setup_page_offset(); 2087 2086 ··· 2248 2249 2249 2250 /* Setup bootmem... */ 2250 2251 last_valid_pfn = end_pfn = bootmem_init(phys_base); 2251 - 2252 - /* Once the OF device tree and MDESC have been setup, we know 2253 - * the list of possible cpus. Therefore we can allocate the 2254 - * IRQ stacks. 2255 - */ 2256 - for_each_possible_cpu(i) { 2257 - node = cpu_to_node(i); 2258 - 2259 - softirq_stack[i] = __alloc_bootmem_node(NODE_DATA(node), 2260 - THREAD_SIZE, 2261 - THREAD_SIZE, 0); 2262 - hardirq_stack[i] = __alloc_bootmem_node(NODE_DATA(node), 2263 - THREAD_SIZE, 2264 - THREAD_SIZE, 0); 2265 - } 2266 2252 2267 2253 kernel_physical_mapping_init(); 2268 2254

+31 -4

arch/sparc/mm/tlb.c

··· 174 174 return; 175 175 176 176 if ((pmd_val(pmd) ^ pmd_val(orig)) & _PAGE_PMD_HUGE) { 177 - if (pmd_val(pmd) & _PAGE_PMD_HUGE) 178 - mm->context.thp_pte_count++; 179 - else 180 - mm->context.thp_pte_count--; 177 + /* 178 + * Note that this routine only sets pmds for THP pages. 179 + * Hugetlb pages are handled elsewhere. We need to check 180 + * for huge zero page. Huge zero pages are like hugetlb 181 + * pages in that there is no RSS, but there is the need 182 + * for TSB entries. So, huge zero page counts go into 183 + * hugetlb_pte_count. 184 + */ 185 + if (pmd_val(pmd) & _PAGE_PMD_HUGE) { 186 + if (is_huge_zero_page(pmd_page(pmd))) 187 + mm->context.hugetlb_pte_count++; 188 + else 189 + mm->context.thp_pte_count++; 190 + } else { 191 + if (is_huge_zero_page(pmd_page(orig))) 192 + mm->context.hugetlb_pte_count--; 193 + else 194 + mm->context.thp_pte_count--; 195 + } 181 196 182 197 /* Do not try to allocate the TSB hash table if we 183 198 * don't have one already. We have various locks held ··· 219 204 } 220 205 } 221 206 207 + /* 208 + * This routine is only called when splitting a THP 209 + */ 222 210 void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, 223 211 pmd_t *pmdp) 224 212 { ··· 231 213 232 214 set_pmd_at(vma->vm_mm, address, pmdp, entry); 233 215 flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); 216 + 217 + /* 218 + * set_pmd_at() will not be called in a way to decrement 219 + * thp_pte_count when splitting a THP, so do it now. 220 + * Sanity check pmd before doing the actual decrement. 221 + */ 222 + if ((pmd_val(entry) & _PAGE_PMD_HUGE) && 223 + !is_huge_zero_page(pmd_page(entry))) 224 + (vma->vm_mm)->context.thp_pte_count--; 234 225 } 235 226 236 227 void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,

+12 -6

arch/sparc/mm/tsb.c

··· 469 469 470 470 int init_new_context(struct task_struct *tsk, struct mm_struct *mm) 471 471 { 472 + unsigned long mm_rss = get_mm_rss(mm); 472 473 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) 473 - unsigned long total_huge_pte_count; 474 + unsigned long saved_hugetlb_pte_count; 475 + unsigned long saved_thp_pte_count; 474 476 #endif 475 477 unsigned int i; 476 478 ··· 485 483 * will re-increment the counters as the parent PTEs are 486 484 * copied into the child address space. 487 485 */ 488 - total_huge_pte_count = mm->context.hugetlb_pte_count + 489 - mm->context.thp_pte_count; 486 + saved_hugetlb_pte_count = mm->context.hugetlb_pte_count; 487 + saved_thp_pte_count = mm->context.thp_pte_count; 490 488 mm->context.hugetlb_pte_count = 0; 491 489 mm->context.thp_pte_count = 0; 490 + 491 + mm_rss -= saved_thp_pte_count * (HPAGE_SIZE / PAGE_SIZE); 492 492 #endif 493 493 494 494 /* copy_mm() copies over the parent's mm_struct before calling ··· 503 499 /* If this is fork, inherit the parent's TSB size. We would 504 500 * grow it to that size on the first page fault anyways. 505 501 */ 506 - tsb_grow(mm, MM_TSB_BASE, get_mm_rss(mm)); 502 + tsb_grow(mm, MM_TSB_BASE, mm_rss); 507 503 508 504 #if defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE) 509 - if (unlikely(total_huge_pte_count)) 510 - tsb_grow(mm, MM_TSB_HUGE, total_huge_pte_count); 505 + if (unlikely(saved_hugetlb_pte_count + saved_thp_pte_count)) 506 + tsb_grow(mm, MM_TSB_HUGE, 507 + (saved_hugetlb_pte_count + saved_thp_pte_count) * 508 + REAL_HPAGE_PER_HPAGE); 511 509 #endif 512 510 513 511 if (unlikely(!mm->context.tsb_block[MM_TSB_BASE].tsb))