Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'for-linus' into for-next

Conflicts:
mm/percpu.c

Tejun Heo 50de1a8e 390dfd95

+132 -60
+1 -1
drivers/base/cpu.c
··· 97 97 * boot up and this data does not change there after. Hence this 98 98 * operation should be safe. No locking required. 99 99 */ 100 - addr = __pa(per_cpu_ptr(crash_notes, cpunum)); 100 + addr = per_cpu_ptr_to_phys(per_cpu_ptr(crash_notes, cpunum)); 101 101 rc = sprintf(buf, "%Lx\n", addr); 102 102 return rc; 103 103 }
+6
include/linux/percpu.h
··· 130 130 extern void *__alloc_reserved_percpu(size_t size, size_t align); 131 131 extern void *__alloc_percpu(size_t size, size_t align); 132 132 extern void free_percpu(void *__pdata); 133 + extern phys_addr_t per_cpu_ptr_to_phys(void *addr); 133 134 134 135 #ifndef CONFIG_HAVE_SETUP_PER_CPU_AREA 135 136 extern void __init setup_per_cpu_areas(void); ··· 154 153 static inline void free_percpu(void *p) 155 154 { 156 155 kfree(p); 156 + } 157 + 158 + static inline phys_addr_t per_cpu_ptr_to_phys(void *addr) 159 + { 160 + return __pa(addr); 157 161 } 158 162 159 163 static inline void __init setup_per_cpu_areas(void) { }
+11 -11
kernel/sched.c
··· 1563 1563 1564 1564 #ifdef CONFIG_FAIR_GROUP_SCHED 1565 1565 1566 - struct update_shares_data { 1567 - unsigned long rq_weight[NR_CPUS]; 1568 - }; 1569 - 1570 - static DEFINE_PER_CPU(struct update_shares_data, update_shares_data); 1566 + static __read_mostly unsigned long *update_shares_data; 1571 1567 1572 1568 static void __set_se_shares(struct sched_entity *se, unsigned long shares); 1573 1569 ··· 1573 1577 static void update_group_shares_cpu(struct task_group *tg, int cpu, 1574 1578 unsigned long sd_shares, 1575 1579 unsigned long sd_rq_weight, 1576 - struct update_shares_data *usd) 1580 + unsigned long *usd_rq_weight) 1577 1581 { 1578 1582 unsigned long shares, rq_weight; 1579 1583 int boost = 0; 1580 1584 1581 - rq_weight = usd->rq_weight[cpu]; 1585 + rq_weight = usd_rq_weight[cpu]; 1582 1586 if (!rq_weight) { 1583 1587 boost = 1; 1584 1588 rq_weight = NICE_0_LOAD; ··· 1613 1617 static int tg_shares_up(struct task_group *tg, void *data) 1614 1618 { 1615 1619 unsigned long weight, rq_weight = 0, shares = 0; 1616 - struct update_shares_data *usd; 1620 + unsigned long *usd_rq_weight; 1617 1621 struct sched_domain *sd = data; 1618 1622 unsigned long flags; 1619 1623 int i; ··· 1622 1626 return 0; 1623 1627 1624 1628 local_irq_save(flags); 1625 - usd = &__get_cpu_var(update_shares_data); 1629 + usd_rq_weight = per_cpu_ptr(update_shares_data, smp_processor_id()); 1626 1630 1627 1631 for_each_cpu(i, sched_domain_span(sd)) { 1628 1632 weight = tg->cfs_rq[i]->load.weight; 1629 - usd->rq_weight[i] = weight; 1633 + usd_rq_weight[i] = weight; 1630 1634 1631 1635 /* 1632 1636 * If there are currently no tasks on the cpu pretend there ··· 1647 1651 shares = tg->shares; 1648 1652 1649 1653 for_each_cpu(i, sched_domain_span(sd)) 1650 - update_group_shares_cpu(tg, i, shares, rq_weight, usd); 1654 + update_group_shares_cpu(tg, i, shares, rq_weight, usd_rq_weight); 1651 1655 1652 1656 local_irq_restore(flags); 1653 1657 ··· 9402 9406 #endif /* CONFIG_USER_SCHED */ 9403 9407 #endif /* CONFIG_GROUP_SCHED */ 9404 9408 9409 + #if defined CONFIG_FAIR_GROUP_SCHED && defined CONFIG_SMP 9410 + update_shares_data = __alloc_percpu(nr_cpu_ids * sizeof(unsigned long), 9411 + __alignof__(unsigned long)); 9412 + #endif 9405 9413 for_each_possible_cpu(i) { 9406 9414 struct rq *rq; 9407 9415
+114 -48
mm/percpu.c
··· 72 72 #include <asm/cacheflush.h> 73 73 #include <asm/sections.h> 74 74 #include <asm/tlbflush.h> 75 + #include <asm/io.h> 75 76 76 77 #define PCPU_SLOT_BASE_SHIFT 5 /* 1-31 shares the same slot */ 77 78 #define PCPU_DFL_MAP_ALLOC 16 /* start a map with 16 ents */ ··· 152 151 * 153 152 * During allocation, pcpu_alloc_mutex is kept locked all the time and 154 153 * pcpu_lock is grabbed and released as necessary. All actual memory 155 - * allocations are done using GFP_KERNEL with pcpu_lock released. 154 + * allocations are done using GFP_KERNEL with pcpu_lock released. In 155 + * general, percpu memory can't be allocated with irq off but 156 + * irqsave/restore are still used in alloc path so that it can be used 157 + * from early init path - sched_init() specifically. 156 158 * 157 159 * Free path accesses and alters only the index data structures, so it 158 160 * can be safely called from atomic context. When memory needs to be ··· 354 350 } 355 351 356 352 /** 357 - * pcpu_extend_area_map - extend area map for allocation 358 - * @chunk: target chunk 353 + * pcpu_need_to_extend - determine whether chunk area map needs to be extended 354 + * @chunk: chunk of interest 359 355 * 360 - * Extend area map of @chunk so that it can accomodate an allocation. 361 - * A single allocation can split an area into three areas, so this 362 - * function makes sure that @chunk->map has at least two extra slots. 356 + * Determine whether area map of @chunk needs to be extended to 357 + * accomodate a new allocation. 363 358 * 364 359 * CONTEXT: 365 - * pcpu_alloc_mutex, pcpu_lock. pcpu_lock is released and reacquired 366 - * if area map is extended. 360 + * pcpu_lock. 367 361 * 368 362 * RETURNS: 369 - * 0 if noop, 1 if successfully extended, -errno on failure. 363 + * New target map allocation length if extension is necessary, 0 364 + * otherwise. 370 365 */ 371 - static int pcpu_extend_area_map(struct pcpu_chunk *chunk) 372 - __releases(lock) __acquires(lock) 366 + static int pcpu_need_to_extend(struct pcpu_chunk *chunk) 373 367 { 374 368 int new_alloc; 375 - int *new; 376 - size_t size; 377 369 378 - /* has enough? */ 379 370 if (chunk->map_alloc >= chunk->map_used + 2) 380 371 return 0; 381 - 382 - spin_unlock_irq(&pcpu_lock); 383 372 384 373 new_alloc = PCPU_DFL_MAP_ALLOC; 385 374 while (new_alloc < chunk->map_used + 2) 386 375 new_alloc *= 2; 387 376 388 - new = pcpu_mem_alloc(new_alloc * sizeof(new[0])); 389 - if (!new) { 390 - spin_lock_irq(&pcpu_lock); 377 + return new_alloc; 378 + } 379 + 380 + /** 381 + * pcpu_extend_area_map - extend area map of a chunk 382 + * @chunk: chunk of interest 383 + * @new_alloc: new target allocation length of the area map 384 + * 385 + * Extend area map of @chunk to have @new_alloc entries. 386 + * 387 + * CONTEXT: 388 + * Does GFP_KERNEL allocation. Grabs and releases pcpu_lock. 389 + * 390 + * RETURNS: 391 + * 0 on success, -errno on failure. 392 + */ 393 + static int pcpu_extend_area_map(struct pcpu_chunk *chunk, int new_alloc) 394 + { 395 + int *old = NULL, *new = NULL; 396 + size_t old_size = 0, new_size = new_alloc * sizeof(new[0]); 397 + unsigned long flags; 398 + 399 + new = pcpu_mem_alloc(new_size); 400 + if (!new) 391 401 return -ENOMEM; 392 - } 393 402 394 - /* 395 - * Acquire pcpu_lock and switch to new area map. Only free 396 - * could have happened inbetween, so map_used couldn't have 397 - * grown. 398 - */ 399 - spin_lock_irq(&pcpu_lock); 400 - BUG_ON(new_alloc < chunk->map_used + 2); 403 + /* acquire pcpu_lock and switch to new area map */ 404 + spin_lock_irqsave(&pcpu_lock, flags); 401 405 402 - size = chunk->map_alloc * sizeof(chunk->map[0]); 403 - memcpy(new, chunk->map, size); 406 + if (new_alloc <= chunk->map_alloc) 407 + goto out_unlock; 408 + 409 + old_size = chunk->map_alloc * sizeof(chunk->map[0]); 410 + memcpy(new, chunk->map, old_size); 404 411 405 412 /* 406 413 * map_alloc < PCPU_DFL_MAP_ALLOC indicates that the chunk is 407 414 * one of the first chunks and still using static map. 408 415 */ 409 416 if (chunk->map_alloc >= PCPU_DFL_MAP_ALLOC) 410 - pcpu_mem_free(chunk->map, size); 417 + old = chunk->map; 411 418 412 419 chunk->map_alloc = new_alloc; 413 420 chunk->map = new; 421 + new = NULL; 422 + 423 + out_unlock: 424 + spin_unlock_irqrestore(&pcpu_lock, flags); 425 + 426 + /* 427 + * pcpu_mem_free() might end up calling vfree() which uses 428 + * IRQ-unsafe lock and thus can't be called under pcpu_lock. 429 + */ 430 + pcpu_mem_free(old, old_size); 431 + pcpu_mem_free(new, new_size); 432 + 414 433 return 0; 415 434 } 416 435 ··· 1072 1045 static int warn_limit = 10; 1073 1046 struct pcpu_chunk *chunk; 1074 1047 const char *err; 1075 - int slot, off; 1048 + int slot, off, new_alloc; 1049 + unsigned long flags; 1076 1050 1077 1051 if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) { 1078 1052 WARN(true, "illegal size (%zu) or align (%zu) for " ··· 1082 1054 } 1083 1055 1084 1056 mutex_lock(&pcpu_alloc_mutex); 1085 - spin_lock_irq(&pcpu_lock); 1057 + spin_lock_irqsave(&pcpu_lock, flags); 1086 1058 1087 1059 /* serve reserved allocations from the reserved chunk if available */ 1088 1060 if (reserved && pcpu_reserved_chunk) { 1089 1061 chunk = pcpu_reserved_chunk; 1090 - if (size > chunk->contig_hint || 1091 - pcpu_extend_area_map(chunk) < 0) { 1092 - err = "failed to extend area map of reserved chunk"; 1062 + 1063 + if (size > chunk->contig_hint) { 1064 + err = "alloc from reserved chunk failed"; 1093 1065 goto fail_unlock; 1094 1066 } 1067 + 1068 + while ((new_alloc = pcpu_need_to_extend(chunk))) { 1069 + spin_unlock_irqrestore(&pcpu_lock, flags); 1070 + if (pcpu_extend_area_map(chunk, new_alloc) < 0) { 1071 + err = "failed to extend area map of reserved chunk"; 1072 + goto fail_unlock_mutex; 1073 + } 1074 + spin_lock_irqsave(&pcpu_lock, flags); 1075 + } 1076 + 1095 1077 off = pcpu_alloc_area(chunk, size, align); 1096 1078 if (off >= 0) 1097 1079 goto area_found; 1080 + 1098 1081 err = "alloc from reserved chunk failed"; 1099 1082 goto fail_unlock; 1100 1083 } ··· 1117 1078 if (size > chunk->contig_hint) 1118 1079 continue; 1119 1080 1120 - switch (pcpu_extend_area_map(chunk)) { 1121 - case 0: 1122 - break; 1123 - case 1: 1124 - goto restart; /* pcpu_lock dropped, restart */ 1125 - default: 1126 - err = "failed to extend area map"; 1127 - goto fail_unlock; 1081 + new_alloc = pcpu_need_to_extend(chunk); 1082 + if (new_alloc) { 1083 + spin_unlock_irqrestore(&pcpu_lock, flags); 1084 + if (pcpu_extend_area_map(chunk, 1085 + new_alloc) < 0) { 1086 + err = "failed to extend area map"; 1087 + goto fail_unlock_mutex; 1088 + } 1089 + spin_lock_irqsave(&pcpu_lock, flags); 1090 + /* 1091 + * pcpu_lock has been dropped, need to 1092 + * restart cpu_slot list walking. 1093 + */ 1094 + goto restart; 1128 1095 } 1129 1096 1130 1097 off = pcpu_alloc_area(chunk, size, align); ··· 1140 1095 } 1141 1096 1142 1097 /* hmmm... no space left, create a new chunk */ 1143 - spin_unlock_irq(&pcpu_lock); 1098 + spin_unlock_irqrestore(&pcpu_lock, flags); 1144 1099 1145 1100 chunk = alloc_pcpu_chunk(); 1146 1101 if (!chunk) { ··· 1148 1103 goto fail_unlock_mutex; 1149 1104 } 1150 1105 1151 - spin_lock_irq(&pcpu_lock); 1106 + spin_lock_irqsave(&pcpu_lock, flags); 1152 1107 pcpu_chunk_relocate(chunk, -1); 1153 1108 goto restart; 1154 1109 1155 1110 area_found: 1156 - spin_unlock_irq(&pcpu_lock); 1111 + spin_unlock_irqrestore(&pcpu_lock, flags); 1157 1112 1158 1113 /* populate, map and clear the area */ 1159 1114 if (pcpu_populate_chunk(chunk, off, size)) { 1160 - spin_lock_irq(&pcpu_lock); 1115 + spin_lock_irqsave(&pcpu_lock, flags); 1161 1116 pcpu_free_area(chunk, off); 1162 1117 err = "failed to populate"; 1163 1118 goto fail_unlock; ··· 1169 1124 return __addr_to_pcpu_ptr(chunk->base_addr + off); 1170 1125 1171 1126 fail_unlock: 1172 - spin_unlock_irq(&pcpu_lock); 1127 + spin_unlock_irqrestore(&pcpu_lock, flags); 1173 1128 fail_unlock_mutex: 1174 1129 mutex_unlock(&pcpu_alloc_mutex); 1175 1130 if (warn_limit) { ··· 1300 1255 spin_unlock_irqrestore(&pcpu_lock, flags); 1301 1256 } 1302 1257 EXPORT_SYMBOL_GPL(free_percpu); 1258 + 1259 + /** 1260 + * per_cpu_ptr_to_phys - convert translated percpu address to physical address 1261 + * @addr: the address to be converted to physical address 1262 + * 1263 + * Given @addr which is dereferenceable address obtained via one of 1264 + * percpu access macros, this function translates it into its physical 1265 + * address. The caller is responsible for ensuring @addr stays valid 1266 + * until this function finishes. 1267 + * 1268 + * RETURNS: 1269 + * The physical address for @addr. 1270 + */ 1271 + phys_addr_t per_cpu_ptr_to_phys(void *addr) 1272 + { 1273 + if ((unsigned long)addr < VMALLOC_START || 1274 + (unsigned long)addr >= VMALLOC_END) 1275 + return __pa(addr); 1276 + else 1277 + return page_to_phys(vmalloc_to_page(addr)); 1278 + } 1303 1279 1304 1280 static inline size_t pcpu_calc_fc_sizes(size_t static_size, 1305 1281 size_t reserved_size,