Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu:
percpu: use the right flag for get_vm_area()
percpu, sparc64: fix sparse possible cpu map handling
init: set nr_cpu_ids before setup_per_cpu_areas()

+29 -26
+2 -2
arch/sparc/kernel/smp_64.c
··· 1499 dyn_size = pcpur_size - static_size - PERCPU_MODULE_RESERVE; 1500 1501 1502 - ptrs_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpur_ptrs[0])); 1503 pcpur_ptrs = alloc_bootmem(ptrs_size); 1504 1505 for_each_possible_cpu(cpu) { ··· 1514 1515 /* allocate address and map */ 1516 vm.flags = VM_ALLOC; 1517 - vm.size = num_possible_cpus() * PCPU_CHUNK_SIZE; 1518 vm_area_register_early(&vm, PCPU_CHUNK_SIZE); 1519 1520 for_each_possible_cpu(cpu) {
··· 1499 dyn_size = pcpur_size - static_size - PERCPU_MODULE_RESERVE; 1500 1501 1502 + ptrs_size = PFN_ALIGN(nr_cpu_ids * sizeof(pcpur_ptrs[0])); 1503 pcpur_ptrs = alloc_bootmem(ptrs_size); 1504 1505 for_each_possible_cpu(cpu) { ··· 1514 1515 /* allocate address and map */ 1516 vm.flags = VM_ALLOC; 1517 + vm.size = nr_cpu_ids * PCPU_CHUNK_SIZE; 1518 vm_area_register_early(&vm, PCPU_CHUNK_SIZE); 1519 1520 for_each_possible_cpu(cpu) {
+7 -7
arch/x86/kernel/setup_percpu.c
··· 165 166 if (!chosen) { 167 size_t vm_size = VMALLOC_END - VMALLOC_START; 168 - size_t tot_size = num_possible_cpus() * PMD_SIZE; 169 170 /* on non-NUMA, embedding is better */ 171 if (!pcpu_need_numa()) ··· 199 dyn_size = pcpul_size - static_size - PERCPU_FIRST_CHUNK_RESERVE; 200 201 /* allocate pointer array and alloc large pages */ 202 - map_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpul_map[0])); 203 pcpul_map = alloc_bootmem(map_size); 204 205 for_each_possible_cpu(cpu) { ··· 228 229 /* allocate address and map */ 230 pcpul_vm.flags = VM_ALLOC; 231 - pcpul_vm.size = num_possible_cpus() * PMD_SIZE; 232 vm_area_register_early(&pcpul_vm, PMD_SIZE); 233 234 for_each_possible_cpu(cpu) { ··· 250 PMD_SIZE, pcpul_vm.addr, NULL); 251 252 /* sort pcpul_map array for pcpu_lpage_remapped() */ 253 - for (i = 0; i < num_possible_cpus() - 1; i++) 254 - for (j = i + 1; j < num_possible_cpus(); j++) 255 if (pcpul_map[i].ptr > pcpul_map[j].ptr) { 256 struct pcpul_ent tmp = pcpul_map[i]; 257 pcpul_map[i] = pcpul_map[j]; ··· 288 { 289 void *pmd_addr = (void *)((unsigned long)kaddr & PMD_MASK); 290 unsigned long offset = (unsigned long)kaddr & ~PMD_MASK; 291 - int left = 0, right = num_possible_cpus() - 1; 292 int pos; 293 294 /* pcpul in use at all? */ ··· 377 pcpu4k_nr_static_pages = PFN_UP(static_size); 378 379 /* unaligned allocations can't be freed, round up to page size */ 380 - pages_size = PFN_ALIGN(pcpu4k_nr_static_pages * num_possible_cpus() 381 * sizeof(pcpu4k_pages[0])); 382 pcpu4k_pages = alloc_bootmem(pages_size); 383
··· 165 166 if (!chosen) { 167 size_t vm_size = VMALLOC_END - VMALLOC_START; 168 + size_t tot_size = nr_cpu_ids * PMD_SIZE; 169 170 /* on non-NUMA, embedding is better */ 171 if (!pcpu_need_numa()) ··· 199 dyn_size = pcpul_size - static_size - PERCPU_FIRST_CHUNK_RESERVE; 200 201 /* allocate pointer array and alloc large pages */ 202 + map_size = PFN_ALIGN(nr_cpu_ids * sizeof(pcpul_map[0])); 203 pcpul_map = alloc_bootmem(map_size); 204 205 for_each_possible_cpu(cpu) { ··· 228 229 /* allocate address and map */ 230 pcpul_vm.flags = VM_ALLOC; 231 + pcpul_vm.size = nr_cpu_ids * PMD_SIZE; 232 vm_area_register_early(&pcpul_vm, PMD_SIZE); 233 234 for_each_possible_cpu(cpu) { ··· 250 PMD_SIZE, pcpul_vm.addr, NULL); 251 252 /* sort pcpul_map array for pcpu_lpage_remapped() */ 253 + for (i = 0; i < nr_cpu_ids - 1; i++) 254 + for (j = i + 1; j < nr_cpu_ids; j++) 255 if (pcpul_map[i].ptr > pcpul_map[j].ptr) { 256 struct pcpul_ent tmp = pcpul_map[i]; 257 pcpul_map[i] = pcpul_map[j]; ··· 288 { 289 void *pmd_addr = (void *)((unsigned long)kaddr & PMD_MASK); 290 unsigned long offset = (unsigned long)kaddr & ~PMD_MASK; 291 + int left = 0, right = nr_cpu_ids - 1; 292 int pos; 293 294 /* pcpul in use at all? */ ··· 377 pcpu4k_nr_static_pages = PFN_UP(static_size); 378 379 /* unaligned allocations can't be freed, round up to page size */ 380 + pages_size = PFN_ALIGN(pcpu4k_nr_static_pages * nr_cpu_ids 381 * sizeof(pcpu4k_pages[0])); 382 pcpu4k_pages = alloc_bootmem(pages_size); 383
+1 -1
init/main.c
··· 584 setup_arch(&command_line); 585 mm_init_owner(&init_mm, &init_task); 586 setup_command_line(command_line); 587 - setup_per_cpu_areas(); 588 setup_nr_cpu_ids(); 589 smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */ 590 591 build_all_zonelists();
··· 584 setup_arch(&command_line); 585 mm_init_owner(&init_mm, &init_task); 586 setup_command_line(command_line); 587 setup_nr_cpu_ids(); 588 + setup_per_cpu_areas(); 589 smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */ 590 591 build_all_zonelists();
+19 -16
mm/percpu.c
··· 8 * 9 * This is percpu allocator which can handle both static and dynamic 10 * areas. Percpu areas are allocated in chunks in vmalloc area. Each 11 - * chunk is consisted of num_possible_cpus() units and the first chunk 12 - * is used for static percpu variables in the kernel image (special 13 - * boot time alloc/init handling necessary as these areas need to be 14 - * brought up before allocation services are running). Unit grows as 15 - * necessary and all units grow or shrink in unison. When a chunk is 16 - * filled up, another chunk is allocated. ie. in vmalloc area 17 * 18 * c0 c1 c2 19 * ------------------- ------------------- ------------ ··· 558 static void pcpu_unmap(struct pcpu_chunk *chunk, int page_start, int page_end, 559 bool flush_tlb) 560 { 561 - unsigned int last = num_possible_cpus() - 1; 562 unsigned int cpu; 563 564 /* unmap must not be done on immutable chunk */ ··· 643 */ 644 static int pcpu_map(struct pcpu_chunk *chunk, int page_start, int page_end) 645 { 646 - unsigned int last = num_possible_cpus() - 1; 647 unsigned int cpu; 648 int err; 649 ··· 749 chunk->map[chunk->map_used++] = pcpu_unit_size; 750 chunk->page = chunk->page_ar; 751 752 - chunk->vm = get_vm_area(pcpu_chunk_size, GFP_KERNEL); 753 if (!chunk->vm) { 754 free_pcpu_chunk(chunk); 755 return NULL; ··· 1067 PFN_UP(size_sum)); 1068 1069 pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT; 1070 - pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size; 1071 pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) 1072 - + num_possible_cpus() * pcpu_unit_pages * sizeof(struct page *); 1073 1074 if (dyn_size < 0) 1075 dyn_size = pcpu_unit_size - static_size - reserved_size; ··· 1248 } else 1249 pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE); 1250 1251 - chunk_size = pcpue_unit_size * num_possible_cpus(); 1252 1253 pcpue_ptr = __alloc_bootmem_nopanic(chunk_size, PAGE_SIZE, 1254 __pa(MAX_DMA_ADDRESS)); ··· 1259 } 1260 1261 /* return the leftover and copy */ 1262 - for_each_possible_cpu(cpu) { 1263 void *ptr = pcpue_ptr + cpu * pcpue_unit_size; 1264 1265 - free_bootmem(__pa(ptr + pcpue_size), 1266 - pcpue_unit_size - pcpue_size); 1267 - memcpy(ptr, __per_cpu_load, static_size); 1268 } 1269 1270 /* we're ready, commit */
··· 8 * 9 * This is percpu allocator which can handle both static and dynamic 10 * areas. Percpu areas are allocated in chunks in vmalloc area. Each 11 + * chunk is consisted of nr_cpu_ids units and the first chunk is used 12 + * for static percpu variables in the kernel image (special boot time 13 + * alloc/init handling necessary as these areas need to be brought up 14 + * before allocation services are running). Unit grows as necessary 15 + * and all units grow or shrink in unison. When a chunk is filled up, 16 + * another chunk is allocated. ie. in vmalloc area 17 * 18 * c0 c1 c2 19 * ------------------- ------------------- ------------ ··· 558 static void pcpu_unmap(struct pcpu_chunk *chunk, int page_start, int page_end, 559 bool flush_tlb) 560 { 561 + unsigned int last = nr_cpu_ids - 1; 562 unsigned int cpu; 563 564 /* unmap must not be done on immutable chunk */ ··· 643 */ 644 static int pcpu_map(struct pcpu_chunk *chunk, int page_start, int page_end) 645 { 646 + unsigned int last = nr_cpu_ids - 1; 647 unsigned int cpu; 648 int err; 649 ··· 749 chunk->map[chunk->map_used++] = pcpu_unit_size; 750 chunk->page = chunk->page_ar; 751 752 + chunk->vm = get_vm_area(pcpu_chunk_size, VM_ALLOC); 753 if (!chunk->vm) { 754 free_pcpu_chunk(chunk); 755 return NULL; ··· 1067 PFN_UP(size_sum)); 1068 1069 pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT; 1070 + pcpu_chunk_size = nr_cpu_ids * pcpu_unit_size; 1071 pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) 1072 + + nr_cpu_ids * pcpu_unit_pages * sizeof(struct page *); 1073 1074 if (dyn_size < 0) 1075 dyn_size = pcpu_unit_size - static_size - reserved_size; ··· 1248 } else 1249 pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE); 1250 1251 + chunk_size = pcpue_unit_size * nr_cpu_ids; 1252 1253 pcpue_ptr = __alloc_bootmem_nopanic(chunk_size, PAGE_SIZE, 1254 __pa(MAX_DMA_ADDRESS)); ··· 1259 } 1260 1261 /* return the leftover and copy */ 1262 + for (cpu = 0; cpu < nr_cpu_ids; cpu++) { 1263 void *ptr = pcpue_ptr + cpu * pcpue_unit_size; 1264 1265 + if (cpu_possible(cpu)) { 1266 + free_bootmem(__pa(ptr + pcpue_size), 1267 + pcpue_unit_size - pcpue_size); 1268 + memcpy(ptr, __per_cpu_load, static_size); 1269 + } else 1270 + free_bootmem(__pa(ptr), pcpue_unit_size); 1271 } 1272 1273 /* we're ready, commit */