Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

KVM: PPC: Book3S HV: Accumulate timing information for real-mode code

This reads the timebase at various points in the real-mode guest
entry/exit code and uses that to accumulate total, minimum and
maximum time spent in those parts of the code. Currently these
times are accumulated per vcpu in 5 parts of the code:

* rm_entry - time taken from the start of kvmppc_hv_entry() until
just before entering the guest.
* rm_intr - time from when we take a hypervisor interrupt in the
guest until we either re-enter the guest or decide to exit to the
host. This includes time spent handling hcalls in real mode.
* rm_exit - time from when we decide to exit the guest until the
return from kvmppc_hv_entry().
* guest - time spend in the guest
* cede - time spent napping in real mode due to an H_CEDE hcall
while other threads in the same vcore are active.

These times are exposed in debugfs in a directory per vcpu that
contains a file called "timings". This file contains one line for
each of the 5 timings above, with the name followed by a colon and
4 numbers, which are the count (number of times the code has been
executed), the total time, the minimum time, and the maximum time,
all in nanoseconds.

The overhead of the extra code amounts to about 30ns for an hcall that
is handled in real mode (e.g. H_SET_DABR), which is about 25%. Since
production environments may not wish to incur this overhead, the new
code is conditional on a new config symbol,
CONFIG_KVM_BOOK3S_HV_EXIT_TIMING.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>

authored by

Paul Mackerras and committed by
Alexander Graf
b6c295df e23a808b

+346 -2
+21
arch/powerpc/include/asm/kvm_host.h
··· 369 369 u8 base_page_size; /* MMU_PAGE_xxx */ 370 370 }; 371 371 372 + /* Struct used to accumulate timing information in HV real mode code */ 373 + struct kvmhv_tb_accumulator { 374 + u64 seqcount; /* used to synchronize access, also count * 2 */ 375 + u64 tb_total; /* total time in timebase ticks */ 376 + u64 tb_min; /* min time */ 377 + u64 tb_max; /* max time */ 378 + }; 379 + 372 380 # ifdef CONFIG_PPC_FSL_BOOK3E 373 381 #define KVMPPC_BOOKE_IAC_NUM 2 374 382 #define KVMPPC_BOOKE_DAC_NUM 2 ··· 665 657 666 658 u32 emul_inst; 667 659 #endif 660 + 661 + #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING 662 + struct kvmhv_tb_accumulator *cur_activity; /* What we're timing */ 663 + u64 cur_tb_start; /* when it started */ 664 + struct kvmhv_tb_accumulator rm_entry; /* real-mode entry code */ 665 + struct kvmhv_tb_accumulator rm_intr; /* real-mode intr handling */ 666 + struct kvmhv_tb_accumulator rm_exit; /* real-mode exit code */ 667 + struct kvmhv_tb_accumulator guest_time; /* guest execution */ 668 + struct kvmhv_tb_accumulator cede_time; /* time napping inside guest */ 669 + 670 + struct dentry *debugfs_dir; 671 + struct dentry *debugfs_timings; 672 + #endif /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */ 668 673 }; 669 674 670 675 #define VCPU_FPR(vcpu, i) (vcpu)->arch.fp.fpr[i][TS_FPROFFSET]
+3
arch/powerpc/include/asm/time.h
··· 211 211 212 212 DECLARE_PER_CPU(u64, decrementers_next_tb); 213 213 214 + /* Convert timebase ticks to nanoseconds */ 215 + unsigned long long tb_to_ns(unsigned long long tb_ticks); 216 + 214 217 #endif /* __KERNEL__ */ 215 218 #endif /* __POWERPC_TIME_H */
+13
arch/powerpc/kernel/asm-offsets.c
··· 459 459 DEFINE(VCPU_SPRG2, offsetof(struct kvm_vcpu, arch.shregs.sprg2)); 460 460 DEFINE(VCPU_SPRG3, offsetof(struct kvm_vcpu, arch.shregs.sprg3)); 461 461 #endif 462 + #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING 463 + DEFINE(VCPU_TB_RMENTRY, offsetof(struct kvm_vcpu, arch.rm_entry)); 464 + DEFINE(VCPU_TB_RMINTR, offsetof(struct kvm_vcpu, arch.rm_intr)); 465 + DEFINE(VCPU_TB_RMEXIT, offsetof(struct kvm_vcpu, arch.rm_exit)); 466 + DEFINE(VCPU_TB_GUEST, offsetof(struct kvm_vcpu, arch.guest_time)); 467 + DEFINE(VCPU_TB_CEDE, offsetof(struct kvm_vcpu, arch.cede_time)); 468 + DEFINE(VCPU_CUR_ACTIVITY, offsetof(struct kvm_vcpu, arch.cur_activity)); 469 + DEFINE(VCPU_ACTIVITY_START, offsetof(struct kvm_vcpu, arch.cur_tb_start)); 470 + DEFINE(TAS_SEQCOUNT, offsetof(struct kvmhv_tb_accumulator, seqcount)); 471 + DEFINE(TAS_TOTAL, offsetof(struct kvmhv_tb_accumulator, tb_total)); 472 + DEFINE(TAS_MIN, offsetof(struct kvmhv_tb_accumulator, tb_min)); 473 + DEFINE(TAS_MAX, offsetof(struct kvmhv_tb_accumulator, tb_max)); 474 + #endif 462 475 DEFINE(VCPU_SHARED_SPRG3, offsetof(struct kvm_vcpu_arch_shared, sprg3)); 463 476 DEFINE(VCPU_SHARED_SPRG4, offsetof(struct kvm_vcpu_arch_shared, sprg4)); 464 477 DEFINE(VCPU_SHARED_SPRG5, offsetof(struct kvm_vcpu_arch_shared, sprg5));
+6
arch/powerpc/kernel/time.c
··· 608 608 } 609 609 #endif 610 610 611 + unsigned long long tb_to_ns(unsigned long long ticks) 612 + { 613 + return mulhdu(ticks, tb_to_ns_scale) << tb_to_ns_shift; 614 + } 615 + EXPORT_SYMBOL_GPL(tb_to_ns); 616 + 611 617 /* 612 618 * Scheduler clock - returns current time in nanosec units. 613 619 *
+14
arch/powerpc/kvm/Kconfig
··· 110 110 processor, including emulating 32-bit processors on a 64-bit 111 111 host. 112 112 113 + config KVM_BOOK3S_HV_EXIT_TIMING 114 + bool "Detailed timing for hypervisor real-mode code" 115 + depends on KVM_BOOK3S_HV_POSSIBLE && DEBUG_FS 116 + ---help--- 117 + Calculate time taken for each vcpu in the real-mode guest entry, 118 + exit, and interrupt handling code, plus time spent in the guest 119 + and in nap mode due to idle (cede) while other threads are still 120 + in the guest. The total, minimum and maximum times in nanoseconds 121 + together with the number of executions are reported in debugfs in 122 + kvm/vm#/vcpu#/timings. The overhead is of the order of 30 - 40 123 + ns per exit on POWER8. 124 + 125 + If unsure, say N. 126 + 113 127 config KVM_BOOKE_HV 114 128 bool 115 129
+150
arch/powerpc/kvm/book3s_hv.c
··· 1423 1423 return vcore; 1424 1424 } 1425 1425 1426 + #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING 1427 + static struct debugfs_timings_element { 1428 + const char *name; 1429 + size_t offset; 1430 + } timings[] = { 1431 + {"rm_entry", offsetof(struct kvm_vcpu, arch.rm_entry)}, 1432 + {"rm_intr", offsetof(struct kvm_vcpu, arch.rm_intr)}, 1433 + {"rm_exit", offsetof(struct kvm_vcpu, arch.rm_exit)}, 1434 + {"guest", offsetof(struct kvm_vcpu, arch.guest_time)}, 1435 + {"cede", offsetof(struct kvm_vcpu, arch.cede_time)}, 1436 + }; 1437 + 1438 + #define N_TIMINGS (sizeof(timings) / sizeof(timings[0])) 1439 + 1440 + struct debugfs_timings_state { 1441 + struct kvm_vcpu *vcpu; 1442 + unsigned int buflen; 1443 + char buf[N_TIMINGS * 100]; 1444 + }; 1445 + 1446 + static int debugfs_timings_open(struct inode *inode, struct file *file) 1447 + { 1448 + struct kvm_vcpu *vcpu = inode->i_private; 1449 + struct debugfs_timings_state *p; 1450 + 1451 + p = kzalloc(sizeof(*p), GFP_KERNEL); 1452 + if (!p) 1453 + return -ENOMEM; 1454 + 1455 + kvm_get_kvm(vcpu->kvm); 1456 + p->vcpu = vcpu; 1457 + file->private_data = p; 1458 + 1459 + return nonseekable_open(inode, file); 1460 + } 1461 + 1462 + static int debugfs_timings_release(struct inode *inode, struct file *file) 1463 + { 1464 + struct debugfs_timings_state *p = file->private_data; 1465 + 1466 + kvm_put_kvm(p->vcpu->kvm); 1467 + kfree(p); 1468 + return 0; 1469 + } 1470 + 1471 + static ssize_t debugfs_timings_read(struct file *file, char __user *buf, 1472 + size_t len, loff_t *ppos) 1473 + { 1474 + struct debugfs_timings_state *p = file->private_data; 1475 + struct kvm_vcpu *vcpu = p->vcpu; 1476 + char *s, *buf_end; 1477 + struct kvmhv_tb_accumulator tb; 1478 + u64 count; 1479 + loff_t pos; 1480 + ssize_t n; 1481 + int i, loops; 1482 + bool ok; 1483 + 1484 + if (!p->buflen) { 1485 + s = p->buf; 1486 + buf_end = s + sizeof(p->buf); 1487 + for (i = 0; i < N_TIMINGS; ++i) { 1488 + struct kvmhv_tb_accumulator *acc; 1489 + 1490 + acc = (struct kvmhv_tb_accumulator *) 1491 + ((unsigned long)vcpu + timings[i].offset); 1492 + ok = false; 1493 + for (loops = 0; loops < 1000; ++loops) { 1494 + count = acc->seqcount; 1495 + if (!(count & 1)) { 1496 + smp_rmb(); 1497 + tb = *acc; 1498 + smp_rmb(); 1499 + if (count == acc->seqcount) { 1500 + ok = true; 1501 + break; 1502 + } 1503 + } 1504 + udelay(1); 1505 + } 1506 + if (!ok) 1507 + snprintf(s, buf_end - s, "%s: stuck\n", 1508 + timings[i].name); 1509 + else 1510 + snprintf(s, buf_end - s, 1511 + "%s: %llu %llu %llu %llu\n", 1512 + timings[i].name, count / 2, 1513 + tb_to_ns(tb.tb_total), 1514 + tb_to_ns(tb.tb_min), 1515 + tb_to_ns(tb.tb_max)); 1516 + s += strlen(s); 1517 + } 1518 + p->buflen = s - p->buf; 1519 + } 1520 + 1521 + pos = *ppos; 1522 + if (pos >= p->buflen) 1523 + return 0; 1524 + if (len > p->buflen - pos) 1525 + len = p->buflen - pos; 1526 + n = copy_to_user(buf, p->buf + pos, len); 1527 + if (n) { 1528 + if (n == len) 1529 + return -EFAULT; 1530 + len -= n; 1531 + } 1532 + *ppos = pos + len; 1533 + return len; 1534 + } 1535 + 1536 + static ssize_t debugfs_timings_write(struct file *file, const char __user *buf, 1537 + size_t len, loff_t *ppos) 1538 + { 1539 + return -EACCES; 1540 + } 1541 + 1542 + static const struct file_operations debugfs_timings_ops = { 1543 + .owner = THIS_MODULE, 1544 + .open = debugfs_timings_open, 1545 + .release = debugfs_timings_release, 1546 + .read = debugfs_timings_read, 1547 + .write = debugfs_timings_write, 1548 + .llseek = generic_file_llseek, 1549 + }; 1550 + 1551 + /* Create a debugfs directory for the vcpu */ 1552 + static void debugfs_vcpu_init(struct kvm_vcpu *vcpu, unsigned int id) 1553 + { 1554 + char buf[16]; 1555 + struct kvm *kvm = vcpu->kvm; 1556 + 1557 + snprintf(buf, sizeof(buf), "vcpu%u", id); 1558 + if (IS_ERR_OR_NULL(kvm->arch.debugfs_dir)) 1559 + return; 1560 + vcpu->arch.debugfs_dir = debugfs_create_dir(buf, kvm->arch.debugfs_dir); 1561 + if (IS_ERR_OR_NULL(vcpu->arch.debugfs_dir)) 1562 + return; 1563 + vcpu->arch.debugfs_timings = 1564 + debugfs_create_file("timings", 0444, vcpu->arch.debugfs_dir, 1565 + vcpu, &debugfs_timings_ops); 1566 + } 1567 + 1568 + #else /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */ 1569 + static void debugfs_vcpu_init(struct kvm_vcpu *vcpu, unsigned int id) 1570 + { 1571 + } 1572 + #endif /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */ 1573 + 1426 1574 static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm, 1427 1575 unsigned int id) 1428 1576 { ··· 1639 1491 1640 1492 vcpu->arch.cpu_type = KVM_CPU_3S_64; 1641 1493 kvmppc_sanity_check(vcpu); 1494 + 1495 + debugfs_vcpu_init(vcpu, id); 1642 1496 1643 1497 return vcpu; 1644 1498
+139 -2
arch/powerpc/kvm/book3s_hv_rmhandlers.S
··· 225 225 /* Got an IPI but other vcpus aren't yet exiting, must be a latecomer */ 226 226 ld r4, HSTATE_KVM_VCPU(r13) 227 227 cmpdi r4, 0 228 - bne kvmppc_got_guest 228 + beq kvmppc_primary_no_guest 229 + 230 + #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING 231 + addi r3, r4, VCPU_TB_RMENTRY 232 + bl kvmhv_start_timing 233 + #endif 234 + b kvmppc_got_guest 229 235 230 236 kvm_novcpu_exit: 231 237 b hdec_soon ··· 382 376 li r6, KVM_GUEST_MODE_HOST_HV 383 377 stb r6, HSTATE_IN_GUEST(r13) 384 378 379 + #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING 380 + /* Store initial timestamp */ 381 + cmpdi r4, 0 382 + beq 1f 383 + addi r3, r4, VCPU_TB_RMENTRY 384 + bl kvmhv_start_timing 385 + 1: 386 + #endif 385 387 /* Clear out SLB */ 386 388 li r6,0 387 389 slbmte r6,r6 ··· 894 880 li r9, KVM_GUEST_MODE_GUEST_HV 895 881 stb r9, HSTATE_IN_GUEST(r13) 896 882 883 + #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING 884 + /* Accumulate timing */ 885 + addi r3, r4, VCPU_TB_GUEST 886 + bl kvmhv_accumulate_time 887 + #endif 888 + 897 889 /* Enter guest */ 898 890 899 891 BEGIN_FTR_SECTION ··· 936 916 937 917 hrfid 938 918 b . 919 + 920 + #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING 921 + secondary_too_late: 922 + cmpdi r4, 0 923 + beq 11f 924 + addi r3, r4, VCPU_TB_RMEXIT 925 + bl kvmhv_accumulate_time 926 + 11: b kvmhv_switch_to_host 927 + 928 + hdec_soon: 929 + ld r4, HSTATE_KVM_VCPU(r13) 930 + cmpdi r4, 0 931 + beq 12f 932 + addi r3, r4, VCPU_TB_RMEXIT 933 + bl kvmhv_accumulate_time 934 + 12: b kvmhv_do_exit 935 + #endif 939 936 940 937 /****************************************************************************** 941 938 * * ··· 1039 1002 1040 1003 stw r12,VCPU_TRAP(r9) 1041 1004 1005 + #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING 1006 + addi r3, r9, VCPU_TB_RMINTR 1007 + mr r4, r9 1008 + bl kvmhv_accumulate_time 1009 + ld r5, VCPU_GPR(R5)(r9) 1010 + ld r6, VCPU_GPR(R6)(r9) 1011 + ld r7, VCPU_GPR(R7)(r9) 1012 + ld r8, VCPU_GPR(R8)(r9) 1013 + #endif 1014 + 1042 1015 /* Save HEIR (HV emulation assist reg) in emul_inst 1043 1016 if this is an HEI (HV emulation interrupt, e40) */ 1044 1017 li r3,KVM_INST_FETCH_FAILED ··· 1120 1073 cmpwi r12, BOOK3S_INTERRUPT_MACHINE_CHECK 1121 1074 beq machine_check_realmode 1122 1075 mc_cont: 1076 + #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING 1077 + addi r3, r9, VCPU_TB_RMEXIT 1078 + mr r4, r9 1079 + bl kvmhv_accumulate_time 1080 + #endif 1123 1081 1124 1082 /* Save guest CTRL register, set runlatch to 1 */ 1125 1083 6: mfspr r6,SPRN_CTRLF ··· 1469 1417 slbia 1470 1418 ptesync 1471 1419 1472 - hdec_soon: /* r12 = trap, r13 = paca */ 1420 + #ifndef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING 1421 + hdec_soon: 1422 + #endif 1423 + kvmhv_do_exit: /* r12 = trap, r13 = paca */ 1473 1424 /* 1474 1425 * POWER7/POWER8 guest -> host partition switch code. 1475 1426 * We don't have to lock against tlbies but we do ··· 1531 1476 addi r6,r6,PACA_SIZE 1532 1477 bne 42b 1533 1478 1479 + #ifndef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING 1534 1480 secondary_too_late: 1481 + #endif 1482 + kvmhv_switch_to_host: 1535 1483 /* Secondary threads wait for primary to do partition switch */ 1536 1484 43: ld r5,HSTATE_KVM_VCORE(r13) 1537 1485 ld r4,VCORE_KVM(r5) /* pointer to struct kvm */ ··· 1620 1562 1: addi r8,r8,16 1621 1563 .endr 1622 1564 1565 + #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING 1566 + /* Finish timing, if we have a vcpu */ 1567 + ld r4, HSTATE_KVM_VCPU(r13) 1568 + cmpdi r4, 0 1569 + li r3, 0 1570 + beq 2f 1571 + bl kvmhv_accumulate_time 1572 + 2: 1573 + #endif 1623 1574 /* Unset guest mode */ 1624 1575 li r0, KVM_GUEST_MODE_NONE 1625 1576 stb r0, HSTATE_IN_GUEST(r13) ··· 2136 2069 /* save FP state */ 2137 2070 bl kvmppc_save_fp 2138 2071 2072 + #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING 2073 + ld r4, HSTATE_KVM_VCPU(r13) 2074 + addi r3, r4, VCPU_TB_CEDE 2075 + bl kvmhv_accumulate_time 2076 + #endif 2077 + 2139 2078 /* 2140 2079 * Take a nap until a decrementer or external or doobell interrupt 2141 2080 * occurs, with PECE1, PECE0 and PECEDP set in LPCR. Also clear the ··· 2181 2108 2182 2109 /* Woken by external or decrementer interrupt */ 2183 2110 ld r1, HSTATE_HOST_R1(r13) 2111 + 2112 + #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING 2113 + addi r3, r4, VCPU_TB_RMINTR 2114 + bl kvmhv_accumulate_time 2115 + #endif 2184 2116 2185 2117 /* load up FP state */ 2186 2118 bl kvmppc_load_fp ··· 2507 2429 mtspr SPRN_PMC6, r3 2508 2430 isync 2509 2431 blr 2432 + 2433 + #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING 2434 + /* 2435 + * Start timing an activity 2436 + * r3 = pointer to time accumulation struct, r4 = vcpu 2437 + */ 2438 + kvmhv_start_timing: 2439 + ld r5, HSTATE_KVM_VCORE(r13) 2440 + lbz r6, VCORE_IN_GUEST(r5) 2441 + cmpwi r6, 0 2442 + beq 5f /* if in guest, need to */ 2443 + ld r6, VCORE_TB_OFFSET(r5) /* subtract timebase offset */ 2444 + 5: mftb r5 2445 + subf r5, r6, r5 2446 + std r3, VCPU_CUR_ACTIVITY(r4) 2447 + std r5, VCPU_ACTIVITY_START(r4) 2448 + blr 2449 + 2450 + /* 2451 + * Accumulate time to one activity and start another. 2452 + * r3 = pointer to new time accumulation struct, r4 = vcpu 2453 + */ 2454 + kvmhv_accumulate_time: 2455 + ld r5, HSTATE_KVM_VCORE(r13) 2456 + lbz r8, VCORE_IN_GUEST(r5) 2457 + cmpwi r8, 0 2458 + beq 4f /* if in guest, need to */ 2459 + ld r8, VCORE_TB_OFFSET(r5) /* subtract timebase offset */ 2460 + 4: ld r5, VCPU_CUR_ACTIVITY(r4) 2461 + ld r6, VCPU_ACTIVITY_START(r4) 2462 + std r3, VCPU_CUR_ACTIVITY(r4) 2463 + mftb r7 2464 + subf r7, r8, r7 2465 + std r7, VCPU_ACTIVITY_START(r4) 2466 + cmpdi r5, 0 2467 + beqlr 2468 + subf r3, r6, r7 2469 + ld r8, TAS_SEQCOUNT(r5) 2470 + cmpdi r8, 0 2471 + addi r8, r8, 1 2472 + std r8, TAS_SEQCOUNT(r5) 2473 + lwsync 2474 + ld r7, TAS_TOTAL(r5) 2475 + add r7, r7, r3 2476 + std r7, TAS_TOTAL(r5) 2477 + ld r6, TAS_MIN(r5) 2478 + ld r7, TAS_MAX(r5) 2479 + beq 3f 2480 + cmpd r3, r6 2481 + bge 1f 2482 + 3: std r3, TAS_MIN(r5) 2483 + 1: cmpd r3, r7 2484 + ble 2f 2485 + std r3, TAS_MAX(r5) 2486 + 2: lwsync 2487 + addi r8, r8, 1 2488 + std r8, TAS_SEQCOUNT(r5) 2489 + blr 2490 + #endif