Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

MIPS: Collect FPU emulator statistics per-CPU.

On SMP systems, the collection of statistics can cause cache line
bouncing in the lines associated with the counters. Also there are
races incrementing the counters on multiple CPUs.

To fix both problems, we collect the statistics in per-CPU variables,
and add them up in the debugfs read operation.

As a test I ran the LTP float_bessel test on a 12 CPU Octeon system.

Without CONFIG_DEBUG_FS : 2602 seconds.
With CONFIG_DEBUG_FS: 2640 seconds.
With non-cpu-local atomic statistics: 14569 seconds.

Signed-off-by: David Daney <ddaney@caviumnetworks.com>
Cc: linux-mips@linux-mips.org
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>

authored by

David Daney and committed by
Ralf Baechle
b6ee75ed 32028f1f

+80 -50
+17 -7
arch/mips/include/asm/fpu_emulator.h
··· 25 25 26 26 #include <asm/break.h> 27 27 #include <asm/inst.h> 28 + #include <asm/local.h> 29 + 30 + #ifdef CONFIG_DEBUG_FS 28 31 29 32 struct mips_fpu_emulator_stats { 30 - unsigned int emulated; 31 - unsigned int loads; 32 - unsigned int stores; 33 - unsigned int cp1ops; 34 - unsigned int cp1xops; 35 - unsigned int errors; 33 + local_t emulated; 34 + local_t loads; 35 + local_t stores; 36 + local_t cp1ops; 37 + local_t cp1xops; 38 + local_t errors; 36 39 }; 37 40 38 - extern struct mips_fpu_emulator_stats fpuemustats; 41 + DECLARE_PER_CPU(struct mips_fpu_emulator_stats, fpuemustats); 42 + 43 + #define MIPS_FPU_EMU_INC_STATS(M) \ 44 + cpu_local_wrap(__local_inc(&__get_cpu_var(fpuemustats).M)) 45 + 46 + #else 47 + #define MIPS_FPU_EMU_INC_STATS(M) do { } while (0) 48 + #endif /* CONFIG_DEBUG_FS */ 39 49 40 50 extern int mips_dsemul(struct pt_regs *regs, mips_instruction ir, 41 51 unsigned long cpc);
+61 -41
arch/mips/math-emu/cp1emu.c
··· 35 35 * better performance by compiling with -msoft-float! 36 36 */ 37 37 #include <linux/sched.h> 38 + #include <linux/module.h> 38 39 #include <linux/debugfs.h> 39 40 40 41 #include <asm/inst.h> ··· 69 68 70 69 /* Further private data for which no space exists in mips_fpu_struct */ 71 70 72 - struct mips_fpu_emulator_stats fpuemustats; 71 + #ifdef CONFIG_DEBUG_FS 72 + DEFINE_PER_CPU(struct mips_fpu_emulator_stats, fpuemustats); 73 + #endif 73 74 74 75 /* Control registers */ 75 76 ··· 212 209 unsigned int cond; 213 210 214 211 if (get_user(ir, (mips_instruction __user *) xcp->cp0_epc)) { 215 - fpuemustats.errors++; 212 + MIPS_FPU_EMU_INC_STATS(errors); 216 213 return SIGBUS; 217 214 } 218 215 ··· 243 240 return SIGILL; 244 241 } 245 242 if (get_user(ir, (mips_instruction __user *) emulpc)) { 246 - fpuemustats.errors++; 243 + MIPS_FPU_EMU_INC_STATS(errors); 247 244 return SIGBUS; 248 245 } 249 246 /* __compute_return_epc() will have updated cp0_epc */ ··· 256 253 } 257 254 258 255 emul: 259 - fpuemustats.emulated++; 256 + MIPS_FPU_EMU_INC_STATS(emulated); 260 257 switch (MIPSInst_OPCODE(ir)) { 261 258 case ldc1_op:{ 262 259 u64 __user *va = (u64 __user *) (xcp->regs[MIPSInst_RS(ir)] + 263 260 MIPSInst_SIMM(ir)); 264 261 u64 val; 265 262 266 - fpuemustats.loads++; 263 + MIPS_FPU_EMU_INC_STATS(loads); 267 264 if (get_user(val, va)) { 268 - fpuemustats.errors++; 265 + MIPS_FPU_EMU_INC_STATS(errors); 269 266 return SIGBUS; 270 267 } 271 268 DITOREG(val, MIPSInst_RT(ir)); ··· 277 274 MIPSInst_SIMM(ir)); 278 275 u64 val; 279 276 280 - fpuemustats.stores++; 277 + MIPS_FPU_EMU_INC_STATS(stores); 281 278 DIFROMREG(val, MIPSInst_RT(ir)); 282 279 if (put_user(val, va)) { 283 - fpuemustats.errors++; 280 + MIPS_FPU_EMU_INC_STATS(errors); 284 281 return SIGBUS; 285 282 } 286 283 break; ··· 291 288 MIPSInst_SIMM(ir)); 292 289 u32 val; 293 290 294 - fpuemustats.loads++; 291 + MIPS_FPU_EMU_INC_STATS(loads); 295 292 if (get_user(val, va)) { 296 - fpuemustats.errors++; 293 + MIPS_FPU_EMU_INC_STATS(errors); 297 294 return SIGBUS; 298 295 } 299 296 SITOREG(val, MIPSInst_RT(ir)); ··· 305 302 MIPSInst_SIMM(ir)); 306 303 u32 val; 307 304 308 - fpuemustats.stores++; 305 + MIPS_FPU_EMU_INC_STATS(stores); 309 306 SIFROMREG(val, MIPSInst_RT(ir)); 310 307 if (put_user(val, va)) { 311 - fpuemustats.errors++; 308 + MIPS_FPU_EMU_INC_STATS(errors); 312 309 return SIGBUS; 313 310 } 314 311 break; ··· 432 429 433 430 if (get_user(ir, 434 431 (mips_instruction __user *) xcp->cp0_epc)) { 435 - fpuemustats.errors++; 432 + MIPS_FPU_EMU_INC_STATS(errors); 436 433 return SIGBUS; 437 434 } 438 435 ··· 598 595 { 599 596 unsigned rcsr = 0; /* resulting csr */ 600 597 601 - fpuemustats.cp1xops++; 598 + MIPS_FPU_EMU_INC_STATS(cp1xops); 602 599 603 600 switch (MIPSInst_FMA_FFMT(ir)) { 604 601 case s_fmt:{ /* 0 */ ··· 613 610 va = (void __user *) (xcp->regs[MIPSInst_FR(ir)] + 614 611 xcp->regs[MIPSInst_FT(ir)]); 615 612 616 - fpuemustats.loads++; 613 + MIPS_FPU_EMU_INC_STATS(loads); 617 614 if (get_user(val, va)) { 618 - fpuemustats.errors++; 615 + MIPS_FPU_EMU_INC_STATS(errors); 619 616 return SIGBUS; 620 617 } 621 618 SITOREG(val, MIPSInst_FD(ir)); ··· 625 622 va = (void __user *) (xcp->regs[MIPSInst_FR(ir)] + 626 623 xcp->regs[MIPSInst_FT(ir)]); 627 624 628 - fpuemustats.stores++; 625 + MIPS_FPU_EMU_INC_STATS(stores); 629 626 630 627 SIFROMREG(val, MIPSInst_FS(ir)); 631 628 if (put_user(val, va)) { 632 - fpuemustats.errors++; 629 + MIPS_FPU_EMU_INC_STATS(errors); 633 630 return SIGBUS; 634 631 } 635 632 break; ··· 690 687 va = (void __user *) (xcp->regs[MIPSInst_FR(ir)] + 691 688 xcp->regs[MIPSInst_FT(ir)]); 692 689 693 - fpuemustats.loads++; 690 + MIPS_FPU_EMU_INC_STATS(loads); 694 691 if (get_user(val, va)) { 695 - fpuemustats.errors++; 692 + MIPS_FPU_EMU_INC_STATS(errors); 696 693 return SIGBUS; 697 694 } 698 695 DITOREG(val, MIPSInst_FD(ir)); ··· 702 699 va = (void __user *) (xcp->regs[MIPSInst_FR(ir)] + 703 700 xcp->regs[MIPSInst_FT(ir)]); 704 701 705 - fpuemustats.stores++; 702 + MIPS_FPU_EMU_INC_STATS(stores); 706 703 DIFROMREG(val, MIPSInst_FS(ir)); 707 704 if (put_user(val, va)) { 708 - fpuemustats.errors++; 705 + MIPS_FPU_EMU_INC_STATS(errors); 709 706 return SIGBUS; 710 707 } 711 708 break; ··· 772 769 #endif 773 770 } rv; /* resulting value */ 774 771 775 - fpuemustats.cp1ops++; 772 + MIPS_FPU_EMU_INC_STATS(cp1ops); 776 773 switch (rfmt = (MIPSInst_FFMT(ir) & 0xf)) { 777 774 case s_fmt:{ /* 0 */ 778 775 union { ··· 1243 1240 prevepc = xcp->cp0_epc; 1244 1241 1245 1242 if (get_user(insn, (mips_instruction __user *) xcp->cp0_epc)) { 1246 - fpuemustats.errors++; 1243 + MIPS_FPU_EMU_INC_STATS(errors); 1247 1244 return SIGBUS; 1248 1245 } 1249 1246 if (insn == 0) ··· 1279 1276 } 1280 1277 1281 1278 #ifdef CONFIG_DEBUG_FS 1279 + 1280 + static int fpuemu_stat_get(void *data, u64 *val) 1281 + { 1282 + int cpu; 1283 + unsigned long sum = 0; 1284 + for_each_online_cpu(cpu) { 1285 + struct mips_fpu_emulator_stats *ps; 1286 + local_t *pv; 1287 + ps = &per_cpu(fpuemustats, cpu); 1288 + pv = (void *)ps + (unsigned long)data; 1289 + sum += local_read(pv); 1290 + } 1291 + *val = sum; 1292 + return 0; 1293 + } 1294 + DEFINE_SIMPLE_ATTRIBUTE(fops_fpuemu_stat, fpuemu_stat_get, NULL, "%llu\n"); 1295 + 1282 1296 extern struct dentry *mips_debugfs_dir; 1283 1297 static int __init debugfs_fpuemu(void) 1284 1298 { 1285 1299 struct dentry *d, *dir; 1286 - int i; 1287 - static struct { 1288 - const char *name; 1289 - unsigned int *v; 1290 - } vars[] __initdata = { 1291 - { "emulated", &fpuemustats.emulated }, 1292 - { "loads", &fpuemustats.loads }, 1293 - { "stores", &fpuemustats.stores }, 1294 - { "cp1ops", &fpuemustats.cp1ops }, 1295 - { "cp1xops", &fpuemustats.cp1xops }, 1296 - { "errors", &fpuemustats.errors }, 1297 - }; 1298 1300 1299 1301 if (!mips_debugfs_dir) 1300 1302 return -ENODEV; 1301 1303 dir = debugfs_create_dir("fpuemustats", mips_debugfs_dir); 1302 1304 if (!dir) 1303 1305 return -ENOMEM; 1304 - for (i = 0; i < ARRAY_SIZE(vars); i++) { 1305 - d = debugfs_create_u32(vars[i].name, S_IRUGO, dir, vars[i].v); 1306 - if (!d) 1307 - return -ENOMEM; 1308 - } 1306 + 1307 + #define FPU_STAT_CREATE(M) \ 1308 + do { \ 1309 + d = debugfs_create_file(#M , S_IRUGO, dir, \ 1310 + (void *)offsetof(struct mips_fpu_emulator_stats, M), \ 1311 + &fops_fpuemu_stat); \ 1312 + if (!d) \ 1313 + return -ENOMEM; \ 1314 + } while (0) 1315 + 1316 + FPU_STAT_CREATE(emulated); 1317 + FPU_STAT_CREATE(loads); 1318 + FPU_STAT_CREATE(stores); 1319 + FPU_STAT_CREATE(cp1ops); 1320 + FPU_STAT_CREATE(cp1xops); 1321 + FPU_STAT_CREATE(errors); 1322 + 1309 1323 return 0; 1310 1324 } 1311 1325 __initcall(debugfs_fpuemu);
+2 -2
arch/mips/math-emu/dsemul.c
··· 98 98 err |= __put_user(cpc, &fr->epc); 99 99 100 100 if (unlikely(err)) { 101 - fpuemustats.errors++; 101 + MIPS_FPU_EMU_INC_STATS(errors); 102 102 return SIGBUS; 103 103 } 104 104 ··· 136 136 err |= __get_user(cookie, &fr->cookie); 137 137 138 138 if (unlikely(err || (insn != BREAK_MATH) || (cookie != BD_COOKIE))) { 139 - fpuemustats.errors++; 139 + MIPS_FPU_EMU_INC_STATS(errors); 140 140 return 0; 141 141 } 142 142