Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/percpu:
percpu: allow limited allocation before slab is online
percpu: make @dyn_size always mean min dyn_size in first chunk init functions

+64 -42
+14 -6
include/linux/percpu.h
··· 45 #define PCPU_MIN_UNIT_SIZE PFN_ALIGN(64 << 10) 46 47 /* 48 * PERCPU_DYNAMIC_RESERVE indicates the amount of free area to piggy 49 * back on the first chunk for dynamic percpu allocation if arch is 50 * manually allocating and mapping it for faster access (as a part of ··· 114 int nr_units); 115 extern void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai); 116 117 - extern struct pcpu_alloc_info * __init pcpu_build_alloc_info( 118 - size_t reserved_size, ssize_t dyn_size, 119 - size_t atom_size, 120 - pcpu_fc_cpu_distance_fn_t cpu_distance_fn); 121 - 122 extern int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, 123 void *base_addr); 124 125 #ifdef CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK 126 - extern int __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size, 127 size_t atom_size, 128 pcpu_fc_cpu_distance_fn_t cpu_distance_fn, 129 pcpu_fc_alloc_fn_t alloc_fn, ··· 145 #ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA 146 extern void __init setup_per_cpu_areas(void); 147 #endif 148 149 #else /* CONFIG_SMP */ 150 ··· 158 } 159 160 static inline void __init setup_per_cpu_areas(void) { } 161 162 static inline void *pcpu_lpage_remapped(void *kaddr) 163 {
··· 45 #define PCPU_MIN_UNIT_SIZE PFN_ALIGN(64 << 10) 46 47 /* 48 + * Percpu allocator can serve percpu allocations before slab is 49 + * initialized which allows slab to depend on the percpu allocator. 50 + * The following two parameters decide how much resource to 51 + * preallocate for this. Keep PERCPU_DYNAMIC_RESERVE equal to or 52 + * larger than PERCPU_DYNAMIC_EARLY_SIZE. 53 + */ 54 + #define PERCPU_DYNAMIC_EARLY_SLOTS 128 55 + #define PERCPU_DYNAMIC_EARLY_SIZE (12 << 10) 56 + 57 + /* 58 * PERCPU_DYNAMIC_RESERVE indicates the amount of free area to piggy 59 * back on the first chunk for dynamic percpu allocation if arch is 60 * manually allocating and mapping it for faster access (as a part of ··· 104 int nr_units); 105 extern void __init pcpu_free_alloc_info(struct pcpu_alloc_info *ai); 106 107 extern int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, 108 void *base_addr); 109 110 #ifdef CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK 111 + extern int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size, 112 size_t atom_size, 113 pcpu_fc_cpu_distance_fn_t cpu_distance_fn, 114 pcpu_fc_alloc_fn_t alloc_fn, ··· 140 #ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA 141 extern void __init setup_per_cpu_areas(void); 142 #endif 143 + extern void __init percpu_init_late(void); 144 145 #else /* CONFIG_SMP */ 146 ··· 152 } 153 154 static inline void __init setup_per_cpu_areas(void) { } 155 + 156 + static inline void __init percpu_init_late(void) { } 157 158 static inline void *pcpu_lpage_remapped(void *kaddr) 159 {
+1
init/main.c
··· 532 page_cgroup_init_flatmem(); 533 mem_init(); 534 kmem_cache_init(); 535 pgtable_cache_init(); 536 vmalloc_init(); 537 }
··· 532 page_cgroup_init_flatmem(); 533 mem_init(); 534 kmem_cache_init(); 535 + percpu_init_late(); 536 pgtable_cache_init(); 537 vmalloc_init(); 538 }
+49 -36
mm/percpu.c
··· 282 */ 283 static void *pcpu_mem_alloc(size_t size) 284 { 285 if (size <= PAGE_SIZE) 286 return kzalloc(size, GFP_KERNEL); 287 else { ··· 394 395 old_size = chunk->map_alloc * sizeof(chunk->map[0]); 396 memcpy(new, chunk->map, old_size); 397 - 398 - /* 399 - * map_alloc < PCPU_DFL_MAP_ALLOC indicates that the chunk is 400 - * one of the first chunks and still using static map. 401 - */ 402 - if (chunk->map_alloc >= PCPU_DFL_MAP_ALLOC) 403 - old = chunk->map; 404 405 chunk->map_alloc = new_alloc; 406 chunk->map = new; ··· 600 { 601 struct pcpu_chunk *chunk; 602 603 - chunk = kzalloc(pcpu_chunk_struct_size, GFP_KERNEL); 604 if (!chunk) 605 return NULL; 606 ··· 1009 return page_to_phys(pcpu_addr_to_page(addr)); 1010 } 1011 1012 - static inline size_t pcpu_calc_fc_sizes(size_t static_size, 1013 - size_t reserved_size, 1014 - ssize_t *dyn_sizep) 1015 - { 1016 - size_t size_sum; 1017 - 1018 - size_sum = PFN_ALIGN(static_size + reserved_size + 1019 - (*dyn_sizep >= 0 ? *dyn_sizep : 0)); 1020 - if (*dyn_sizep != 0) 1021 - *dyn_sizep = size_sum - static_size - reserved_size; 1022 - 1023 - return size_sum; 1024 - } 1025 - 1026 /** 1027 * pcpu_alloc_alloc_info - allocate percpu allocation info 1028 * @nr_groups: the number of groups ··· 1067 /** 1068 * pcpu_build_alloc_info - build alloc_info considering distances between CPUs 1069 * @reserved_size: the size of reserved percpu area in bytes 1070 - * @dyn_size: free size for dynamic allocation in bytes, -1 for auto 1071 * @atom_size: allocation atom size 1072 * @cpu_distance_fn: callback to determine distance between cpus, optional 1073 * ··· 1085 * On success, pointer to the new allocation_info is returned. On 1086 * failure, ERR_PTR value is returned. 1087 */ 1088 - struct pcpu_alloc_info * __init pcpu_build_alloc_info( 1089 - size_t reserved_size, ssize_t dyn_size, 1090 size_t atom_size, 1091 pcpu_fc_cpu_distance_fn_t cpu_distance_fn) 1092 { ··· 1105 memset(group_map, 0, sizeof(group_map)); 1106 memset(group_cnt, 0, sizeof(group_cnt)); 1107 1108 /* 1109 * Determine min_unit_size, alloc_size and max_upa such that 1110 * alloc_size is multiple of atom_size and is the smallest 1111 * which can accomodate 4k aligned segments which are equal to 1112 * or larger than min_unit_size. 1113 */ 1114 - size_sum = pcpu_calc_fc_sizes(static_size, reserved_size, &dyn_size); 1115 min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE); 1116 1117 alloc_size = roundup(min_unit_size, atom_size); ··· 1336 void *base_addr) 1337 { 1338 static char cpus_buf[4096] __initdata; 1339 - static int smap[2], dmap[2]; 1340 size_t dyn_size = ai->dyn_size; 1341 size_t size_sum = ai->static_size + ai->reserved_size + dyn_size; 1342 struct pcpu_chunk *schunk, *dchunk = NULL; ··· 1360 } while (0) 1361 1362 /* sanity checks */ 1363 - BUILD_BUG_ON(ARRAY_SIZE(smap) >= PCPU_DFL_MAP_ALLOC || 1364 - ARRAY_SIZE(dmap) >= PCPU_DFL_MAP_ALLOC); 1365 PCPU_SETUP_BUG_ON(ai->nr_groups <= 0); 1366 PCPU_SETUP_BUG_ON(!ai->static_size); 1367 PCPU_SETUP_BUG_ON(!base_addr); 1368 PCPU_SETUP_BUG_ON(ai->unit_size < size_sum); 1369 PCPU_SETUP_BUG_ON(ai->unit_size & ~PAGE_MASK); 1370 PCPU_SETUP_BUG_ON(ai->unit_size < PCPU_MIN_UNIT_SIZE); 1371 PCPU_SETUP_BUG_ON(pcpu_verify_alloc_info(ai) < 0); 1372 1373 /* process group information and build config tables accordingly */ ··· 1518 /** 1519 * pcpu_embed_first_chunk - embed the first percpu chunk into bootmem 1520 * @reserved_size: the size of reserved percpu area in bytes 1521 - * @dyn_size: free size for dynamic allocation in bytes, -1 for auto 1522 * @atom_size: allocation atom size 1523 * @cpu_distance_fn: callback to determine distance between cpus, optional 1524 * @alloc_fn: function to allocate percpu page ··· 1539 * vmalloc space is not orders of magnitude larger than distances 1540 * between node memory addresses (ie. 32bit NUMA machines). 1541 * 1542 - * When @dyn_size is positive, dynamic area might be larger than 1543 - * specified to fill page alignment. When @dyn_size is auto, 1544 - * @dyn_size is just big enough to fill page alignment after static 1545 - * and reserved areas. 1546 * 1547 * If the needed size is smaller than the minimum or specified unit 1548 * size, the leftover is returned using @free_fn. ··· 1547 * RETURNS: 1548 * 0 on success, -errno on failure. 1549 */ 1550 - int __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size, 1551 size_t atom_size, 1552 pcpu_fc_cpu_distance_fn_t cpu_distance_fn, 1553 pcpu_fc_alloc_fn_t alloc_fn, ··· 1678 1679 snprintf(psize_str, sizeof(psize_str), "%luK", PAGE_SIZE >> 10); 1680 1681 - ai = pcpu_build_alloc_info(reserved_size, -1, PAGE_SIZE, NULL); 1682 if (IS_ERR(ai)) 1683 return PTR_ERR(ai); 1684 BUG_ON(ai->nr_groups != 1); ··· 1804 __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu]; 1805 } 1806 #endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */
··· 282 */ 283 static void *pcpu_mem_alloc(size_t size) 284 { 285 + if (WARN_ON_ONCE(!slab_is_available())) 286 + return NULL; 287 + 288 if (size <= PAGE_SIZE) 289 return kzalloc(size, GFP_KERNEL); 290 else { ··· 391 392 old_size = chunk->map_alloc * sizeof(chunk->map[0]); 393 memcpy(new, chunk->map, old_size); 394 395 chunk->map_alloc = new_alloc; 396 chunk->map = new; ··· 604 { 605 struct pcpu_chunk *chunk; 606 607 + chunk = pcpu_mem_alloc(pcpu_chunk_struct_size); 608 if (!chunk) 609 return NULL; 610 ··· 1013 return page_to_phys(pcpu_addr_to_page(addr)); 1014 } 1015 1016 /** 1017 * pcpu_alloc_alloc_info - allocate percpu allocation info 1018 * @nr_groups: the number of groups ··· 1085 /** 1086 * pcpu_build_alloc_info - build alloc_info considering distances between CPUs 1087 * @reserved_size: the size of reserved percpu area in bytes 1088 + * @dyn_size: minimum free size for dynamic allocation in bytes 1089 * @atom_size: allocation atom size 1090 * @cpu_distance_fn: callback to determine distance between cpus, optional 1091 * ··· 1103 * On success, pointer to the new allocation_info is returned. On 1104 * failure, ERR_PTR value is returned. 1105 */ 1106 + static struct pcpu_alloc_info * __init pcpu_build_alloc_info( 1107 + size_t reserved_size, size_t dyn_size, 1108 size_t atom_size, 1109 pcpu_fc_cpu_distance_fn_t cpu_distance_fn) 1110 { ··· 1123 memset(group_map, 0, sizeof(group_map)); 1124 memset(group_cnt, 0, sizeof(group_cnt)); 1125 1126 + /* calculate size_sum and ensure dyn_size is enough for early alloc */ 1127 + size_sum = PFN_ALIGN(static_size + reserved_size + 1128 + max_t(size_t, dyn_size, PERCPU_DYNAMIC_EARLY_SIZE)); 1129 + dyn_size = size_sum - static_size - reserved_size; 1130 + 1131 /* 1132 * Determine min_unit_size, alloc_size and max_upa such that 1133 * alloc_size is multiple of atom_size and is the smallest 1134 * which can accomodate 4k aligned segments which are equal to 1135 * or larger than min_unit_size. 1136 */ 1137 min_unit_size = max_t(size_t, size_sum, PCPU_MIN_UNIT_SIZE); 1138 1139 alloc_size = roundup(min_unit_size, atom_size); ··· 1350 void *base_addr) 1351 { 1352 static char cpus_buf[4096] __initdata; 1353 + static int smap[PERCPU_DYNAMIC_EARLY_SLOTS] __initdata; 1354 + static int dmap[PERCPU_DYNAMIC_EARLY_SLOTS] __initdata; 1355 size_t dyn_size = ai->dyn_size; 1356 size_t size_sum = ai->static_size + ai->reserved_size + dyn_size; 1357 struct pcpu_chunk *schunk, *dchunk = NULL; ··· 1373 } while (0) 1374 1375 /* sanity checks */ 1376 PCPU_SETUP_BUG_ON(ai->nr_groups <= 0); 1377 PCPU_SETUP_BUG_ON(!ai->static_size); 1378 PCPU_SETUP_BUG_ON(!base_addr); 1379 PCPU_SETUP_BUG_ON(ai->unit_size < size_sum); 1380 PCPU_SETUP_BUG_ON(ai->unit_size & ~PAGE_MASK); 1381 PCPU_SETUP_BUG_ON(ai->unit_size < PCPU_MIN_UNIT_SIZE); 1382 + PCPU_SETUP_BUG_ON(ai->dyn_size < PERCPU_DYNAMIC_EARLY_SIZE); 1383 PCPU_SETUP_BUG_ON(pcpu_verify_alloc_info(ai) < 0); 1384 1385 /* process group information and build config tables accordingly */ ··· 1532 /** 1533 * pcpu_embed_first_chunk - embed the first percpu chunk into bootmem 1534 * @reserved_size: the size of reserved percpu area in bytes 1535 + * @dyn_size: minimum free size for dynamic allocation in bytes 1536 * @atom_size: allocation atom size 1537 * @cpu_distance_fn: callback to determine distance between cpus, optional 1538 * @alloc_fn: function to allocate percpu page ··· 1553 * vmalloc space is not orders of magnitude larger than distances 1554 * between node memory addresses (ie. 32bit NUMA machines). 1555 * 1556 + * @dyn_size specifies the minimum dynamic area size. 1557 * 1558 * If the needed size is smaller than the minimum or specified unit 1559 * size, the leftover is returned using @free_fn. ··· 1564 * RETURNS: 1565 * 0 on success, -errno on failure. 1566 */ 1567 + int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size, 1568 size_t atom_size, 1569 pcpu_fc_cpu_distance_fn_t cpu_distance_fn, 1570 pcpu_fc_alloc_fn_t alloc_fn, ··· 1695 1696 snprintf(psize_str, sizeof(psize_str), "%luK", PAGE_SIZE >> 10); 1697 1698 + ai = pcpu_build_alloc_info(reserved_size, 0, PAGE_SIZE, NULL); 1699 if (IS_ERR(ai)) 1700 return PTR_ERR(ai); 1701 BUG_ON(ai->nr_groups != 1); ··· 1821 __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu]; 1822 } 1823 #endif /* CONFIG_HAVE_SETUP_PER_CPU_AREA */ 1824 + 1825 + /* 1826 + * First and reserved chunks are initialized with temporary allocation 1827 + * map in initdata so that they can be used before slab is online. 1828 + * This function is called after slab is brought up and replaces those 1829 + * with properly allocated maps. 1830 + */ 1831 + void __init percpu_init_late(void) 1832 + { 1833 + struct pcpu_chunk *target_chunks[] = 1834 + { pcpu_first_chunk, pcpu_reserved_chunk, NULL }; 1835 + struct pcpu_chunk *chunk; 1836 + unsigned long flags; 1837 + int i; 1838 + 1839 + for (i = 0; (chunk = target_chunks[i]); i++) { 1840 + int *map; 1841 + const size_t size = PERCPU_DYNAMIC_EARLY_SLOTS * sizeof(map[0]); 1842 + 1843 + BUILD_BUG_ON(size > PAGE_SIZE); 1844 + 1845 + map = pcpu_mem_alloc(size); 1846 + BUG_ON(!map); 1847 + 1848 + spin_lock_irqsave(&pcpu_lock, flags); 1849 + memcpy(map, chunk->map, size); 1850 + chunk->map = map; 1851 + spin_unlock_irqrestore(&pcpu_lock, flags); 1852 + } 1853 + }