Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu:
percpu: make allocation failures more verbose
percpu: make pcpu_setup_first_chunk() failures more verbose
percpu: make embedding first chunk allocator check vmalloc space size
sparc64: implement page mapping percpu first chunk allocator
percpu: make pcpu_build_alloc_info() clear static buffers
percpu: fix unit_map[] verification in pcpu_setup_first_chunk()

+113 -24
+3
arch/sparc/Kconfig
··· 102 102 config NEED_PER_CPU_EMBED_FIRST_CHUNK 103 103 def_bool y if SPARC64 104 104 105 + config NEED_PER_CPU_PAGE_FIRST_CHUNK 106 + def_bool y if SPARC64 107 + 105 108 config GENERIC_HARDIRQS_NO__DO_IRQ 106 109 bool 107 110 def_bool y if SPARC64
+43 -8
arch/sparc/kernel/smp_64.c
··· 1420 1420 free_bootmem(__pa(ptr), size); 1421 1421 } 1422 1422 1423 - static int pcpu_cpu_distance(unsigned int from, unsigned int to) 1423 + static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) 1424 1424 { 1425 1425 if (cpu_to_node(from) == cpu_to_node(to)) 1426 1426 return LOCAL_DISTANCE; ··· 1428 1428 return REMOTE_DISTANCE; 1429 1429 } 1430 1430 1431 + static void __init pcpu_populate_pte(unsigned long addr) 1432 + { 1433 + pgd_t *pgd = pgd_offset_k(addr); 1434 + pud_t *pud; 1435 + pmd_t *pmd; 1436 + 1437 + pud = pud_offset(pgd, addr); 1438 + if (pud_none(*pud)) { 1439 + pmd_t *new; 1440 + 1441 + new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); 1442 + pud_populate(&init_mm, pud, new); 1443 + } 1444 + 1445 + pmd = pmd_offset(pud, addr); 1446 + if (!pmd_present(*pmd)) { 1447 + pte_t *new; 1448 + 1449 + new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE); 1450 + pmd_populate_kernel(&init_mm, pmd, new); 1451 + } 1452 + } 1453 + 1431 1454 void __init setup_per_cpu_areas(void) 1432 1455 { 1433 1456 unsigned long delta; 1434 1457 unsigned int cpu; 1435 - int rc; 1458 + int rc = -EINVAL; 1436 1459 1437 - rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE, 1438 - PERCPU_DYNAMIC_RESERVE, 4 << 20, 1439 - pcpu_cpu_distance, pcpu_alloc_bootmem, 1440 - pcpu_free_bootmem); 1441 - if (rc) 1442 - panic("failed to initialize first chunk (%d)", rc); 1460 + if (pcpu_chosen_fc != PCPU_FC_PAGE) { 1461 + rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE, 1462 + PERCPU_DYNAMIC_RESERVE, 4 << 20, 1463 + pcpu_cpu_distance, 1464 + pcpu_alloc_bootmem, 1465 + pcpu_free_bootmem); 1466 + if (rc) 1467 + pr_warning("PERCPU: %s allocator failed (%d), " 1468 + "falling back to page size\n", 1469 + pcpu_fc_names[pcpu_chosen_fc], rc); 1470 + } 1471 + if (rc < 0) 1472 + rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE, 1473 + pcpu_alloc_bootmem, 1474 + pcpu_free_bootmem, 1475 + pcpu_populate_pte); 1476 + if (rc < 0) 1477 + panic("cannot initialize percpu area (err=%d)", rc); 1443 1478 1444 1479 delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; 1445 1480 for_each_possible_cpu(cpu)
+67 -16
mm/percpu.c
··· 1043 1043 */ 1044 1044 static void *pcpu_alloc(size_t size, size_t align, bool reserved) 1045 1045 { 1046 + static int warn_limit = 10; 1046 1047 struct pcpu_chunk *chunk; 1048 + const char *err; 1047 1049 int slot, off; 1048 1050 1049 1051 if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) { ··· 1061 1059 if (reserved && pcpu_reserved_chunk) { 1062 1060 chunk = pcpu_reserved_chunk; 1063 1061 if (size > chunk->contig_hint || 1064 - pcpu_extend_area_map(chunk) < 0) 1062 + pcpu_extend_area_map(chunk) < 0) { 1063 + err = "failed to extend area map of reserved chunk"; 1065 1064 goto fail_unlock; 1065 + } 1066 1066 off = pcpu_alloc_area(chunk, size, align); 1067 1067 if (off >= 0) 1068 1068 goto area_found; 1069 + err = "alloc from reserved chunk failed"; 1069 1070 goto fail_unlock; 1070 1071 } 1071 1072 ··· 1085 1080 case 1: 1086 1081 goto restart; /* pcpu_lock dropped, restart */ 1087 1082 default: 1083 + err = "failed to extend area map"; 1088 1084 goto fail_unlock; 1089 1085 } 1090 1086 ··· 1099 1093 spin_unlock_irq(&pcpu_lock); 1100 1094 1101 1095 chunk = alloc_pcpu_chunk(); 1102 - if (!chunk) 1096 + if (!chunk) { 1097 + err = "failed to allocate new chunk"; 1103 1098 goto fail_unlock_mutex; 1099 + } 1104 1100 1105 1101 spin_lock_irq(&pcpu_lock); 1106 1102 pcpu_chunk_relocate(chunk, -1); ··· 1115 1107 if (pcpu_populate_chunk(chunk, off, size)) { 1116 1108 spin_lock_irq(&pcpu_lock); 1117 1109 pcpu_free_area(chunk, off); 1110 + err = "failed to populate"; 1118 1111 goto fail_unlock; 1119 1112 } 1120 1113 ··· 1128 1119 spin_unlock_irq(&pcpu_lock); 1129 1120 fail_unlock_mutex: 1130 1121 mutex_unlock(&pcpu_alloc_mutex); 1122 + if (warn_limit) { 1123 + pr_warning("PERCPU: allocation failed, size=%zu align=%zu, " 1124 + "%s\n", size, align, err); 1125 + dump_stack(); 1126 + if (!--warn_limit) 1127 + pr_info("PERCPU: limit reached, disable warning\n"); 1128 + } 1131 1129 return NULL; 1132 1130 } 1133 1131 ··· 1363 1347 struct pcpu_alloc_info *ai; 1364 1348 unsigned int *cpu_map; 1365 1349 1350 + /* this function may be called multiple times */ 1351 + memset(group_map, 0, sizeof(group_map)); 1352 + memset(group_cnt, 0, sizeof(group_map)); 1353 + 1366 1354 /* 1367 1355 * Determine min_unit_size, alloc_size and max_upa such that 1368 1356 * alloc_size is multiple of atom_size and is the smallest ··· 1594 1574 int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, 1595 1575 void *base_addr) 1596 1576 { 1577 + static char cpus_buf[4096] __initdata; 1597 1578 static int smap[2], dmap[2]; 1598 1579 size_t dyn_size = ai->dyn_size; 1599 1580 size_t size_sum = ai->static_size + ai->reserved_size + dyn_size; ··· 1606 1585 int *unit_map; 1607 1586 int group, unit, i; 1608 1587 1588 + cpumask_scnprintf(cpus_buf, sizeof(cpus_buf), cpu_possible_mask); 1589 + 1590 + #define PCPU_SETUP_BUG_ON(cond) do { \ 1591 + if (unlikely(cond)) { \ 1592 + pr_emerg("PERCPU: failed to initialize, %s", #cond); \ 1593 + pr_emerg("PERCPU: cpu_possible_mask=%s\n", cpus_buf); \ 1594 + pcpu_dump_alloc_info(KERN_EMERG, ai); \ 1595 + BUG(); \ 1596 + } \ 1597 + } while (0) 1598 + 1609 1599 /* sanity checks */ 1610 1600 BUILD_BUG_ON(ARRAY_SIZE(smap) >= PCPU_DFL_MAP_ALLOC || 1611 1601 ARRAY_SIZE(dmap) >= PCPU_DFL_MAP_ALLOC); 1612 - BUG_ON(ai->nr_groups <= 0); 1613 - BUG_ON(!ai->static_size); 1614 - BUG_ON(!base_addr); 1615 - BUG_ON(ai->unit_size < size_sum); 1616 - BUG_ON(ai->unit_size & ~PAGE_MASK); 1617 - BUG_ON(ai->unit_size < PCPU_MIN_UNIT_SIZE); 1618 - 1619 - pcpu_dump_alloc_info(KERN_DEBUG, ai); 1602 + PCPU_SETUP_BUG_ON(ai->nr_groups <= 0); 1603 + PCPU_SETUP_BUG_ON(!ai->static_size); 1604 + PCPU_SETUP_BUG_ON(!base_addr); 1605 + PCPU_SETUP_BUG_ON(ai->unit_size < size_sum); 1606 + PCPU_SETUP_BUG_ON(ai->unit_size & ~PAGE_MASK); 1607 + PCPU_SETUP_BUG_ON(ai->unit_size < PCPU_MIN_UNIT_SIZE); 1620 1608 1621 1609 /* process group information and build config tables accordingly */ 1622 1610 group_offsets = alloc_bootmem(ai->nr_groups * sizeof(group_offsets[0])); ··· 1634 1604 unit_off = alloc_bootmem(nr_cpu_ids * sizeof(unit_off[0])); 1635 1605 1636 1606 for (cpu = 0; cpu < nr_cpu_ids; cpu++) 1637 - unit_map[cpu] = NR_CPUS; 1607 + unit_map[cpu] = UINT_MAX; 1638 1608 pcpu_first_unit_cpu = NR_CPUS; 1639 1609 1640 1610 for (group = 0, unit = 0; group < ai->nr_groups; group++, unit += i) { ··· 1648 1618 if (cpu == NR_CPUS) 1649 1619 continue; 1650 1620 1651 - BUG_ON(cpu > nr_cpu_ids || !cpu_possible(cpu)); 1652 - BUG_ON(unit_map[cpu] != NR_CPUS); 1621 + PCPU_SETUP_BUG_ON(cpu > nr_cpu_ids); 1622 + PCPU_SETUP_BUG_ON(!cpu_possible(cpu)); 1623 + PCPU_SETUP_BUG_ON(unit_map[cpu] != UINT_MAX); 1653 1624 1654 1625 unit_map[cpu] = unit + i; 1655 1626 unit_off[cpu] = gi->base_offset + i * ai->unit_size; ··· 1663 1632 pcpu_nr_units = unit; 1664 1633 1665 1634 for_each_possible_cpu(cpu) 1666 - BUG_ON(unit_map[cpu] == NR_CPUS); 1635 + PCPU_SETUP_BUG_ON(unit_map[cpu] == UINT_MAX); 1636 + 1637 + /* we're done parsing the input, undefine BUG macro and dump config */ 1638 + #undef PCPU_SETUP_BUG_ON 1639 + pcpu_dump_alloc_info(KERN_INFO, ai); 1667 1640 1668 1641 pcpu_nr_groups = ai->nr_groups; 1669 1642 pcpu_group_offsets = group_offsets; ··· 1817 1782 void *base = (void *)ULONG_MAX; 1818 1783 void **areas = NULL; 1819 1784 struct pcpu_alloc_info *ai; 1820 - size_t size_sum, areas_size; 1785 + size_t size_sum, areas_size, max_distance; 1821 1786 int group, i, rc; 1822 1787 1823 1788 ai = pcpu_build_alloc_info(reserved_size, dyn_size, atom_size, ··· 1867 1832 } 1868 1833 1869 1834 /* base address is now known, determine group base offsets */ 1870 - for (group = 0; group < ai->nr_groups; group++) 1835 + max_distance = 0; 1836 + for (group = 0; group < ai->nr_groups; group++) { 1871 1837 ai->groups[group].base_offset = areas[group] - base; 1838 + max_distance = max(max_distance, ai->groups[group].base_offset); 1839 + } 1840 + max_distance += ai->unit_size; 1841 + 1842 + /* warn if maximum distance is further than 75% of vmalloc space */ 1843 + if (max_distance > (VMALLOC_END - VMALLOC_START) * 3 / 4) { 1844 + pr_warning("PERCPU: max_distance=0x%lx too large for vmalloc " 1845 + "space 0x%lx\n", 1846 + max_distance, VMALLOC_END - VMALLOC_START); 1847 + #ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK 1848 + /* and fail if we have fallback */ 1849 + rc = -EINVAL; 1850 + goto out_free; 1851 + #endif 1852 + } 1872 1853 1873 1854 pr_info("PERCPU: Embedded %zu pages/cpu @%p s%zu r%zu d%zu u%zu\n", 1874 1855 PFN_DOWN(size_sum), base, ai->static_size, ai->reserved_size,