Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

sparc64: Store per-cpu offset in trap_block[]

Surprisingly this actually makes LOAD_PER_CPU_BASE() a little
more efficient.

Signed-off-by: David S. Miller <davem@davemloft.net>

+21 -44
+3 -3
arch/sparc/include/asm/percpu_64.h
··· 7 7 8 8 #ifdef CONFIG_SMP 9 9 10 + #include <asm/trap_block.h> 11 + 10 12 extern void real_setup_per_cpu_areas(void); 11 13 12 - extern unsigned long __per_cpu_base; 13 - extern unsigned long __per_cpu_shift; 14 14 #define __per_cpu_offset(__cpu) \ 15 - (__per_cpu_base + ((unsigned long)(__cpu) << __per_cpu_shift)) 15 + (trap_block[(__cpu)].__per_cpu_base) 16 16 #define per_cpu_offset(x) (__per_cpu_offset(x)) 17 17 18 18 #define __my_cpu_offset __local_per_cpu_offset
+7 -7
arch/sparc/include/asm/trap_block.h
··· 48 48 unsigned int dev_mondo_qmask; 49 49 unsigned int resum_qmask; 50 50 unsigned int nonresum_qmask; 51 - unsigned long __unused; 51 + unsigned long __per_cpu_base; 52 52 } __attribute__((aligned(64))); 53 53 extern struct trap_per_cpu trap_block[NR_CPUS]; 54 54 extern void init_cur_cpu_trap(struct thread_info *); ··· 101 101 #define TRAP_PER_CPU_DEV_MONDO_QMASK 0xec 102 102 #define TRAP_PER_CPU_RESUM_QMASK 0xf0 103 103 #define TRAP_PER_CPU_NONRESUM_QMASK 0xf4 104 + #define TRAP_PER_CPU_PER_CPU_BASE 0xf8 104 105 105 106 #define TRAP_BLOCK_SZ_SHIFT 8 106 107 ··· 173 172 */ 174 173 #define LOAD_PER_CPU_BASE(DEST, THR, REG1, REG2, REG3) \ 175 174 lduh [THR + TI_CPU], REG1; \ 176 - sethi %hi(__per_cpu_shift), REG3; \ 177 - sethi %hi(__per_cpu_base), REG2; \ 178 - ldx [REG3 + %lo(__per_cpu_shift)], REG3; \ 179 - ldx [REG2 + %lo(__per_cpu_base)], REG2; \ 180 - sllx REG1, REG3, REG3; \ 181 - add REG3, REG2, DEST; 175 + sethi %hi(trap_block), REG2; \ 176 + sllx REG1, TRAP_BLOCK_SZ_SHIFT, REG1; \ 177 + or REG2, %lo(trap_block), REG2; \ 178 + add REG2, REG1, REG2; \ 179 + ldx [REG2 + TRAP_PER_CPU_PER_CPU_BASE], DEST; 182 180 183 181 #else 184 182
-22
arch/sparc/kernel/head_64.S
··· 641 641 /* Not reached... */ 642 642 643 643 1: 644 - /* If we boot on a non-zero cpu, all of the per-cpu 645 - * variable references we make before setting up the 646 - * per-cpu areas will use a bogus offset. Put a 647 - * compensating factor into __per_cpu_base to handle 648 - * this cleanly. 649 - * 650 - * What the per-cpu code calculates is: 651 - * 652 - * __per_cpu_base + (cpu << __per_cpu_shift) 653 - * 654 - * These two variables are zero initially, so to 655 - * make it all cancel out to zero we need to put 656 - * "0 - (cpu << 0)" into __per_cpu_base so that the 657 - * above formula evaluates to zero. 658 - * 659 - * We cannot even perform a printk() until this stuff 660 - * is setup as that calls cpu_clock() which uses 661 - * per-cpu variables. 662 - */ 663 - sub %g0, %o0, %o1 664 - sethi %hi(__per_cpu_base), %o2 665 - stx %o1, [%o2 + %lo(__per_cpu_base)] 666 644 #else 667 645 mov 0, %o0 668 646 #endif
+7 -11
arch/sparc/kernel/smp_64.c
··· 1371 1371 { 1372 1372 } 1373 1373 1374 - unsigned long __per_cpu_base __read_mostly; 1375 - unsigned long __per_cpu_shift __read_mostly; 1376 - 1377 - EXPORT_SYMBOL(__per_cpu_base); 1378 - EXPORT_SYMBOL(__per_cpu_shift); 1379 - 1380 1374 void __init real_setup_per_cpu_areas(void) 1381 1375 { 1382 - unsigned long paddr, goal, size, i; 1376 + unsigned long base, shift, paddr, goal, size, i; 1383 1377 char *ptr; 1384 1378 1385 1379 /* Copy section for each CPU (we discard the original) */ 1386 1380 goal = PERCPU_ENOUGH_ROOM; 1387 1381 1388 - __per_cpu_shift = PAGE_SHIFT; 1382 + shift = PAGE_SHIFT; 1389 1383 for (size = PAGE_SIZE; size < goal; size <<= 1UL) 1390 - __per_cpu_shift++; 1384 + shift++; 1391 1385 1392 1386 paddr = lmb_alloc(size * NR_CPUS, PAGE_SIZE); 1393 1387 if (!paddr) { ··· 1390 1396 } 1391 1397 1392 1398 ptr = __va(paddr); 1393 - __per_cpu_base = ptr - __per_cpu_start; 1399 + base = ptr - __per_cpu_start; 1394 1400 1395 - for (i = 0; i < NR_CPUS; i++, ptr += size) 1401 + for (i = 0; i < NR_CPUS; i++, ptr += size) { 1402 + __per_cpu_offset(i) = base + (i * size); 1396 1403 memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); 1404 + } 1397 1405 1398 1406 /* Setup %g5 for the boot cpu. */ 1399 1407 __local_per_cpu_offset = __per_cpu_offset(smp_processor_id());
+4 -1
arch/sparc/kernel/traps_64.c
··· 2509 2509 } 2510 2510 2511 2511 struct trap_per_cpu trap_block[NR_CPUS]; 2512 + EXPORT_SYMBOL(trap_block); 2512 2513 2513 2514 /* This can get invoked before sched_init() so play it super safe 2514 2515 * and use hard_smp_processor_id(). ··· 2593 2592 (TRAP_PER_CPU_RESUM_QMASK != 2594 2593 offsetof(struct trap_per_cpu, resum_qmask)) || 2595 2594 (TRAP_PER_CPU_NONRESUM_QMASK != 2596 - offsetof(struct trap_per_cpu, nonresum_qmask))) 2595 + offsetof(struct trap_per_cpu, nonresum_qmask)) || 2596 + (TRAP_PER_CPU_PER_CPU_BASE != 2597 + offsetof(struct trap_per_cpu, __per_cpu_base))) 2597 2598 trap_per_cpu_offsets_are_bolixed_dave(); 2598 2599 2599 2600 if ((TSB_CONFIG_TSB !=