Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'tj-percpu' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/misc into core/percpu

+135 -96
+1
arch/arm/kernel/vmlinux.lds.S
··· 64 64 __initramfs_end = .; 65 65 #endif 66 66 . = ALIGN(4096); 67 + __per_cpu_load = .; 67 68 __per_cpu_start = .; 68 69 *(.data.percpu.page_aligned) 69 70 *(.data.percpu)
+2 -10
arch/ia64/kernel/vmlinux.lds.S
··· 213 213 { *(.data.cacheline_aligned) } 214 214 215 215 /* Per-cpu data: */ 216 - percpu : { } :percpu 217 216 . = ALIGN(PERCPU_PAGE_SIZE); 218 - __phys_per_cpu_start = .; 219 - .data.percpu PERCPU_ADDR : AT(__phys_per_cpu_start - LOAD_OFFSET) 220 - { 221 - __per_cpu_start = .; 222 - *(.data.percpu.page_aligned) 223 - *(.data.percpu) 224 - *(.data.percpu.shared_aligned) 225 - __per_cpu_end = .; 226 - } 217 + PERCPU_VADDR(PERCPU_ADDR, :percpu) 218 + __phys_per_cpu_start = __per_cpu_load; 227 219 . = __phys_per_cpu_start + PERCPU_PAGE_SIZE; /* ensure percpu data fits 228 220 * into percpu page size 229 221 */
+1 -8
arch/powerpc/kernel/vmlinux.lds.S
··· 181 181 __initramfs_end = .; 182 182 } 183 183 #endif 184 - . = ALIGN(PAGE_SIZE); 185 - .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { 186 - __per_cpu_start = .; 187 - *(.data.percpu.page_aligned) 188 - *(.data.percpu) 189 - *(.data.percpu.shared_aligned) 190 - __per_cpu_end = .; 191 - } 184 + PERCPU(PAGE_SIZE) 192 185 193 186 . = ALIGN(8); 194 187 .machine.desc : AT(ADDR(.machine.desc) - LOAD_OFFSET) {
-8
arch/x86/include/asm/percpu.h
··· 43 43 #else /* ...!ASSEMBLY */ 44 44 45 45 #include <linux/stringify.h> 46 - #include <asm/sections.h> 47 - 48 - #define __addr_to_pcpu_ptr(addr) \ 49 - (void *)((unsigned long)(addr) - (unsigned long)pcpu_base_addr \ 50 - + (unsigned long)__per_cpu_start) 51 - #define __pcpu_ptr_to_addr(ptr) \ 52 - (void *)((unsigned long)(ptr) + (unsigned long)pcpu_base_addr \ 53 - - (unsigned long)__per_cpu_start) 54 46 55 47 #ifdef CONFIG_SMP 56 48 #define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x
+10 -53
arch/x86/kernel/setup_percpu.c
··· 233 233 "%zu bytes\n", vm.addr, static_size); 234 234 235 235 ret = pcpu_setup_first_chunk(pcpur_get_page, static_size, 236 - PERCPU_FIRST_CHUNK_RESERVE, 237 - PMD_SIZE, dyn_size, vm.addr, NULL); 236 + PERCPU_FIRST_CHUNK_RESERVE, dyn_size, 237 + PMD_SIZE, vm.addr, NULL); 238 238 goto out_free_ar; 239 239 240 240 enomem: ··· 257 257 * Embedding allocator 258 258 * 259 259 * The first chunk is sized to just contain the static area plus 260 - * module and dynamic reserves, and allocated as a contiguous area 261 - * using bootmem allocator and used as-is without being mapped into 262 - * vmalloc area. This enables the first chunk to piggy back on the 263 - * linear physical PMD mapping and doesn't add any additional pressure 264 - * to TLB. Note that if the needed size is smaller than the minimum 265 - * unit size, the leftover is returned to the bootmem allocator. 260 + * module and dynamic reserves and embedded into linear physical 261 + * mapping so that it can use PMD mapping without additional TLB 262 + * pressure. 266 263 */ 267 - static void *pcpue_ptr __initdata; 268 - static size_t pcpue_size __initdata; 269 - static size_t pcpue_unit_size __initdata; 270 - 271 - static struct page * __init pcpue_get_page(unsigned int cpu, int pageno) 272 - { 273 - size_t off = (size_t)pageno << PAGE_SHIFT; 274 - 275 - if (off >= pcpue_size) 276 - return NULL; 277 - 278 - return virt_to_page(pcpue_ptr + cpu * pcpue_unit_size + off); 279 - } 280 - 281 264 static ssize_t __init setup_pcpu_embed(size_t static_size) 282 265 { 283 - unsigned int cpu; 284 - size_t dyn_size; 266 + size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; 285 267 286 268 /* 287 269 * If large page isn't supported, there's no benefit in doing ··· 273 291 if (!cpu_has_pse || pcpu_need_numa()) 274 292 return -EINVAL; 275 293 276 - /* allocate and copy */ 277 - pcpue_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE + 278 - PERCPU_DYNAMIC_RESERVE); 279 - pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE); 280 - dyn_size = pcpue_size - static_size - PERCPU_FIRST_CHUNK_RESERVE; 281 - 282 - pcpue_ptr = pcpu_alloc_bootmem(0, num_possible_cpus() * pcpue_unit_size, 283 - PAGE_SIZE); 284 - if (!pcpue_ptr) 285 - return -ENOMEM; 286 - 287 - for_each_possible_cpu(cpu) { 288 - void *ptr = pcpue_ptr + cpu * pcpue_unit_size; 289 - 290 - free_bootmem(__pa(ptr + pcpue_size), 291 - pcpue_unit_size - pcpue_size); 292 - memcpy(ptr, __per_cpu_load, static_size); 293 - } 294 - 295 - /* we're ready, commit */ 296 - pr_info("PERCPU: Embedded %zu pages at %p, static data %zu bytes\n", 297 - pcpue_size >> PAGE_SHIFT, pcpue_ptr, static_size); 298 - 299 - return pcpu_setup_first_chunk(pcpue_get_page, static_size, 300 - PERCPU_FIRST_CHUNK_RESERVE, 301 - pcpue_unit_size, dyn_size, 302 - pcpue_ptr, NULL); 294 + return pcpu_embed_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE, 295 + reserve - PERCPU_FIRST_CHUNK_RESERVE, -1); 303 296 } 304 297 305 298 /* ··· 332 375 pcpu4k_nr_static_pages, static_size); 333 376 334 377 ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size, 335 - PERCPU_FIRST_CHUNK_RESERVE, -1, -1, NULL, 336 - pcpu4k_populate_pte); 378 + PERCPU_FIRST_CHUNK_RESERVE, -1, 379 + -1, NULL, pcpu4k_populate_pte); 337 380 goto out_free_ar; 338 381 339 382 enomem:
+5 -1
include/linux/percpu.h
··· 107 107 108 108 extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, 109 109 size_t static_size, size_t reserved_size, 110 - ssize_t unit_size, ssize_t dyn_size, 110 + ssize_t dyn_size, ssize_t unit_size, 111 111 void *base_addr, 112 112 pcpu_populate_pte_fn_t populate_pte_fn); 113 + 114 + extern ssize_t __init pcpu_embed_first_chunk( 115 + size_t static_size, size_t reserved_size, 116 + ssize_t dyn_size, ssize_t unit_size); 113 117 114 118 /* 115 119 * Use this to get to a cpu's version of the per-cpu object
+1 -1
mm/allocpercpu.c
··· 120 120 * on it. Larger alignment should only be used for module 121 121 * percpu sections on SMP for which this path isn't used. 122 122 */ 123 - WARN_ON_ONCE(align > __alignof__(unsigned long long)); 123 + WARN_ON_ONCE(align > SMP_CACHE_BYTES); 124 124 125 125 if (unlikely(!pdata)) 126 126 return NULL;
+115 -15
mm/percpu.c
··· 46 46 * - define CONFIG_HAVE_DYNAMIC_PER_CPU_AREA 47 47 * 48 48 * - define __addr_to_pcpu_ptr() and __pcpu_ptr_to_addr() to translate 49 - * regular address to percpu pointer and back 49 + * regular address to percpu pointer and back if they need to be 50 + * different from the default 50 51 * 51 52 * - use pcpu_setup_first_chunk() during percpu area initialization to 52 53 * setup the first chunk containing the kernel static percpu area ··· 68 67 #include <linux/workqueue.h> 69 68 70 69 #include <asm/cacheflush.h> 70 + #include <asm/sections.h> 71 71 #include <asm/tlbflush.h> 72 72 73 73 #define PCPU_SLOT_BASE_SHIFT 5 /* 1-31 shares the same slot */ 74 74 #define PCPU_DFL_MAP_ALLOC 16 /* start a map with 16 ents */ 75 + 76 + /* default addr <-> pcpu_ptr mapping, override in asm/percpu.h if necessary */ 77 + #ifndef __addr_to_pcpu_ptr 78 + #define __addr_to_pcpu_ptr(addr) \ 79 + (void *)((unsigned long)(addr) - (unsigned long)pcpu_base_addr \ 80 + + (unsigned long)__per_cpu_start) 81 + #endif 82 + #ifndef __pcpu_ptr_to_addr 83 + #define __pcpu_ptr_to_addr(ptr) \ 84 + (void *)((unsigned long)(ptr) + (unsigned long)pcpu_base_addr \ 85 + - (unsigned long)__per_cpu_start) 86 + #endif 75 87 76 88 struct pcpu_chunk { 77 89 struct list_head list; /* linked to pcpu_slot lists */ ··· 1027 1013 * @get_page_fn: callback to fetch page pointer 1028 1014 * @static_size: the size of static percpu area in bytes 1029 1015 * @reserved_size: the size of reserved percpu area in bytes 1030 - * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE, -1 for auto 1031 1016 * @dyn_size: free size for dynamic allocation in bytes, -1 for auto 1017 + * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE, -1 for auto 1032 1018 * @base_addr: mapped address, NULL for auto 1033 1019 * @populate_pte_fn: callback to allocate pagetable, NULL if unnecessary 1034 1020 * ··· 1053 1039 * limited offset range for symbol relocations to guarantee module 1054 1040 * percpu symbols fall inside the relocatable range. 1055 1041 * 1042 + * @dyn_size, if non-negative, determines the number of bytes 1043 + * available for dynamic allocation in the first chunk. Specifying 1044 + * non-negative value makes percpu leave alone the area beyond 1045 + * @static_size + @reserved_size + @dyn_size. 1046 + * 1056 1047 * @unit_size, if non-negative, specifies unit size and must be 1057 1048 * aligned to PAGE_SIZE and equal to or larger than @static_size + 1058 - * @reserved_size + @dyn_size. 1059 - * 1060 - * @dyn_size, if non-negative, limits the number of bytes available 1061 - * for dynamic allocation in the first chunk. Specifying non-negative 1062 - * value make percpu leave alone the area beyond @static_size + 1063 - * @reserved_size + @dyn_size. 1049 + * @reserved_size + if non-negative, @dyn_size. 1064 1050 * 1065 1051 * Non-null @base_addr means that the caller already allocated virtual 1066 1052 * region for the first chunk and mapped it. percpu must not mess ··· 1083 1069 */ 1084 1070 size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, 1085 1071 size_t static_size, size_t reserved_size, 1086 - ssize_t unit_size, ssize_t dyn_size, 1072 + ssize_t dyn_size, ssize_t unit_size, 1087 1073 void *base_addr, 1088 1074 pcpu_populate_pte_fn_t populate_pte_fn) 1089 1075 { 1090 1076 static struct vm_struct first_vm; 1091 1077 static int smap[2], dmap[2]; 1078 + size_t size_sum = static_size + reserved_size + 1079 + (dyn_size >= 0 ? dyn_size : 0); 1092 1080 struct pcpu_chunk *schunk, *dchunk = NULL; 1093 1081 unsigned int cpu; 1094 1082 int nr_pages; ··· 1101 1085 ARRAY_SIZE(dmap) >= PCPU_DFL_MAP_ALLOC); 1102 1086 BUG_ON(!static_size); 1103 1087 if (unit_size >= 0) { 1104 - BUG_ON(unit_size < static_size + reserved_size + 1105 - (dyn_size >= 0 ? dyn_size : 0)); 1088 + BUG_ON(unit_size < size_sum); 1106 1089 BUG_ON(unit_size & ~PAGE_MASK); 1107 - } else { 1108 - BUG_ON(dyn_size >= 0); 1090 + BUG_ON(unit_size < PCPU_MIN_UNIT_SIZE); 1091 + } else 1109 1092 BUG_ON(base_addr); 1110 - } 1111 1093 BUG_ON(base_addr && populate_pte_fn); 1112 1094 1113 1095 if (unit_size >= 0) 1114 1096 pcpu_unit_pages = unit_size >> PAGE_SHIFT; 1115 1097 else 1116 1098 pcpu_unit_pages = max_t(int, PCPU_MIN_UNIT_SIZE >> PAGE_SHIFT, 1117 - PFN_UP(static_size + reserved_size)); 1099 + PFN_UP(size_sum)); 1118 1100 1119 1101 pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT; 1120 1102 pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size; ··· 1237 1223 /* we're done */ 1238 1224 pcpu_base_addr = (void *)pcpu_chunk_addr(schunk, 0, 0); 1239 1225 return pcpu_unit_size; 1226 + } 1227 + 1228 + /* 1229 + * Embedding first chunk setup helper. 1230 + */ 1231 + static void *pcpue_ptr __initdata; 1232 + static size_t pcpue_size __initdata; 1233 + static size_t pcpue_unit_size __initdata; 1234 + 1235 + static struct page * __init pcpue_get_page(unsigned int cpu, int pageno) 1236 + { 1237 + size_t off = (size_t)pageno << PAGE_SHIFT; 1238 + 1239 + if (off >= pcpue_size) 1240 + return NULL; 1241 + 1242 + return virt_to_page(pcpue_ptr + cpu * pcpue_unit_size + off); 1243 + } 1244 + 1245 + /** 1246 + * pcpu_embed_first_chunk - embed the first percpu chunk into bootmem 1247 + * @static_size: the size of static percpu area in bytes 1248 + * @reserved_size: the size of reserved percpu area in bytes 1249 + * @dyn_size: free size for dynamic allocation in bytes, -1 for auto 1250 + * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE, -1 for auto 1251 + * 1252 + * This is a helper to ease setting up embedded first percpu chunk and 1253 + * can be called where pcpu_setup_first_chunk() is expected. 1254 + * 1255 + * If this function is used to setup the first chunk, it is allocated 1256 + * as a contiguous area using bootmem allocator and used as-is without 1257 + * being mapped into vmalloc area. This enables the first chunk to 1258 + * piggy back on the linear physical mapping which often uses larger 1259 + * page size. 1260 + * 1261 + * When @dyn_size is positive, dynamic area might be larger than 1262 + * specified to fill page alignment. Also, when @dyn_size is auto, 1263 + * @dyn_size does not fill the whole first chunk but only what's 1264 + * necessary for page alignment after static and reserved areas. 1265 + * 1266 + * If the needed size is smaller than the minimum or specified unit 1267 + * size, the leftover is returned to the bootmem allocator. 1268 + * 1269 + * RETURNS: 1270 + * The determined pcpu_unit_size which can be used to initialize 1271 + * percpu access on success, -errno on failure. 1272 + */ 1273 + ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size, 1274 + ssize_t dyn_size, ssize_t unit_size) 1275 + { 1276 + unsigned int cpu; 1277 + 1278 + /* determine parameters and allocate */ 1279 + pcpue_size = PFN_ALIGN(static_size + reserved_size + 1280 + (dyn_size >= 0 ? dyn_size : 0)); 1281 + if (dyn_size != 0) 1282 + dyn_size = pcpue_size - static_size - reserved_size; 1283 + 1284 + if (unit_size >= 0) { 1285 + BUG_ON(unit_size < pcpue_size); 1286 + pcpue_unit_size = unit_size; 1287 + } else 1288 + pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE); 1289 + 1290 + pcpue_ptr = __alloc_bootmem_nopanic( 1291 + num_possible_cpus() * pcpue_unit_size, 1292 + PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); 1293 + if (!pcpue_ptr) 1294 + return -ENOMEM; 1295 + 1296 + /* return the leftover and copy */ 1297 + for_each_possible_cpu(cpu) { 1298 + void *ptr = pcpue_ptr + cpu * pcpue_unit_size; 1299 + 1300 + free_bootmem(__pa(ptr + pcpue_size), 1301 + pcpue_unit_size - pcpue_size); 1302 + memcpy(ptr, __per_cpu_load, static_size); 1303 + } 1304 + 1305 + /* we're ready, commit */ 1306 + pr_info("PERCPU: Embedded %zu pages at %p, static data %zu bytes\n", 1307 + pcpue_size >> PAGE_SHIFT, pcpue_ptr, static_size); 1308 + 1309 + return pcpu_setup_first_chunk(pcpue_get_page, static_size, 1310 + reserved_size, dyn_size, 1311 + pcpue_unit_size, pcpue_ptr, NULL); 1240 1312 }