Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

KVM: PPC: Book3S HV nestedv2: Add support for reading VPA counters for pseries guests

PAPR hypervisor has introduced three new counters in the VPA area of
LPAR CPUs for KVM L2 guest (see [1] for terminology) observability - two
for context switches from host to guest and vice versa, and one counter
for getting the total time spent inside the KVM guest. Add a tracepoint
that enables reading the counters for use by ftrace/perf. Note that this
tracepoint is only available for nestedv2 API (i.e, KVM on PowerVM).

[1] Terminology:
a. L1 refers to the VM (LPAR) booted on top of PAPR hypervisor
b. L2 refers to the KVM guest booted on top of L1.

Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
Acked-by: Naveen N Rao <naveen@kernel.org>
Signed-off-by: Vaibhav Jain <vaibhav@linux.ibm.com>
Signed-off-by: Gautam Menghani <gautam@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://msgid.link/20240520175742.196329-1-gautam@linux.ibm.com

authored by

Gautam Menghani and committed by
Michael Ellerman
e1f288d2 c3f38fa6

+117 -3
+5
arch/powerpc/include/asm/kvm_book3s_64.h
··· 684 684 int kvmhv_nestedv2_parse_output(struct kvm_vcpu *vcpu); 685 685 int kvmhv_nestedv2_set_vpa(struct kvm_vcpu *vcpu, unsigned long vpa); 686 686 687 + int kmvhv_counters_tracepoint_regfunc(void); 688 + void kmvhv_counters_tracepoint_unregfunc(void); 689 + int kvmhv_get_l2_counters_status(void); 690 + void kvmhv_set_l2_counters_status(int cpu, bool status); 691 + 687 692 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ 688 693 689 694 #endif /* __ASM_KVM_BOOK3S_64_H__ */
+8 -3
arch/powerpc/include/asm/lppaca.h
··· 62 62 u8 donate_dedicated_cpu; /* Donate dedicated CPU cycles */ 63 63 u8 fpregs_in_use; 64 64 u8 pmcregs_in_use; 65 - u8 reserved8[28]; 65 + u8 l2_counters_enable; /* Enable usage of counters for KVM guest */ 66 + u8 reserved8[27]; 66 67 __be64 wait_state_cycles; /* Wait cycles for this proc */ 67 68 u8 reserved9[28]; 68 69 __be16 slb_count; /* # of SLBs to maintain */ ··· 93 92 /* cacheline 4-5 */ 94 93 95 94 __be32 page_ins; /* CMO Hint - # page ins by OS */ 96 - u8 reserved12[148]; 95 + u8 reserved12[28]; 96 + volatile __be64 l1_to_l2_cs_tb; 97 + volatile __be64 l2_to_l1_cs_tb; 98 + volatile __be64 l2_runtime_tb; 99 + u8 reserved13[96]; 97 100 volatile __be64 dtl_idx; /* Dispatch Trace Log head index */ 98 - u8 reserved13[96]; 101 + u8 reserved14[96]; 99 102 } ____cacheline_aligned; 100 103 101 104 #define lppaca_of(cpu) (*paca_ptrs[cpu]->lppaca_ptr)
+75
arch/powerpc/kvm/book3s_hv.c
··· 4108 4108 } 4109 4109 } 4110 4110 4111 + /* Helper functions for reading L2's stats from L1's VPA */ 4112 + #ifdef CONFIG_PPC_PSERIES 4113 + static DEFINE_PER_CPU(u64, l1_to_l2_cs); 4114 + static DEFINE_PER_CPU(u64, l2_to_l1_cs); 4115 + static DEFINE_PER_CPU(u64, l2_runtime_agg); 4116 + 4117 + int kvmhv_get_l2_counters_status(void) 4118 + { 4119 + return firmware_has_feature(FW_FEATURE_LPAR) && 4120 + get_lppaca()->l2_counters_enable; 4121 + } 4122 + 4123 + void kvmhv_set_l2_counters_status(int cpu, bool status) 4124 + { 4125 + if (!firmware_has_feature(FW_FEATURE_LPAR)) 4126 + return; 4127 + if (status) 4128 + lppaca_of(cpu).l2_counters_enable = 1; 4129 + else 4130 + lppaca_of(cpu).l2_counters_enable = 0; 4131 + } 4132 + 4133 + int kmvhv_counters_tracepoint_regfunc(void) 4134 + { 4135 + int cpu; 4136 + 4137 + for_each_present_cpu(cpu) { 4138 + kvmhv_set_l2_counters_status(cpu, true); 4139 + } 4140 + return 0; 4141 + } 4142 + 4143 + void kmvhv_counters_tracepoint_unregfunc(void) 4144 + { 4145 + int cpu; 4146 + 4147 + for_each_present_cpu(cpu) { 4148 + kvmhv_set_l2_counters_status(cpu, false); 4149 + } 4150 + } 4151 + 4152 + static void do_trace_nested_cs_time(struct kvm_vcpu *vcpu) 4153 + { 4154 + struct lppaca *lp = get_lppaca(); 4155 + u64 l1_to_l2_ns, l2_to_l1_ns, l2_runtime_ns; 4156 + u64 *l1_to_l2_cs_ptr = this_cpu_ptr(&l1_to_l2_cs); 4157 + u64 *l2_to_l1_cs_ptr = this_cpu_ptr(&l2_to_l1_cs); 4158 + u64 *l2_runtime_agg_ptr = this_cpu_ptr(&l2_runtime_agg); 4159 + 4160 + l1_to_l2_ns = tb_to_ns(be64_to_cpu(lp->l1_to_l2_cs_tb)); 4161 + l2_to_l1_ns = tb_to_ns(be64_to_cpu(lp->l2_to_l1_cs_tb)); 4162 + l2_runtime_ns = tb_to_ns(be64_to_cpu(lp->l2_runtime_tb)); 4163 + trace_kvmppc_vcpu_stats(vcpu, l1_to_l2_ns - *l1_to_l2_cs_ptr, 4164 + l2_to_l1_ns - *l2_to_l1_cs_ptr, 4165 + l2_runtime_ns - *l2_runtime_agg_ptr); 4166 + *l1_to_l2_cs_ptr = l1_to_l2_ns; 4167 + *l2_to_l1_cs_ptr = l2_to_l1_ns; 4168 + *l2_runtime_agg_ptr = l2_runtime_ns; 4169 + } 4170 + 4171 + #else 4172 + int kvmhv_get_l2_counters_status(void) 4173 + { 4174 + return 0; 4175 + } 4176 + 4177 + static void do_trace_nested_cs_time(struct kvm_vcpu *vcpu) 4178 + { 4179 + } 4180 + #endif 4181 + 4111 4182 static int kvmhv_vcpu_entry_nestedv2(struct kvm_vcpu *vcpu, u64 time_limit, 4112 4183 unsigned long lpcr, u64 *tb) 4113 4184 { ··· 4226 4155 return -EINVAL; 4227 4156 4228 4157 timer_rearm_host_dec(*tb); 4158 + 4159 + /* Record context switch and guest_run_time data */ 4160 + if (kvmhv_get_l2_counters_status()) 4161 + do_trace_nested_cs_time(vcpu); 4229 4162 4230 4163 return trap; 4231 4164 }
+29
arch/powerpc/kvm/trace_hv.h
··· 512 512 __entry->vcpu_id, __entry->exit, __entry->ret) 513 513 ); 514 514 515 + #ifdef CONFIG_PPC_PSERIES 516 + 517 + TRACE_EVENT_FN_COND(kvmppc_vcpu_stats, 518 + TP_PROTO(struct kvm_vcpu *vcpu, u64 l1_to_l2_cs, u64 l2_to_l1_cs, u64 l2_runtime), 519 + 520 + TP_ARGS(vcpu, l1_to_l2_cs, l2_to_l1_cs, l2_runtime), 521 + 522 + TP_CONDITION(l1_to_l2_cs || l2_to_l1_cs || l2_runtime), 523 + 524 + TP_STRUCT__entry( 525 + __field(int, vcpu_id) 526 + __field(u64, l1_to_l2_cs) 527 + __field(u64, l2_to_l1_cs) 528 + __field(u64, l2_runtime) 529 + ), 530 + 531 + TP_fast_assign( 532 + __entry->vcpu_id = vcpu->vcpu_id; 533 + __entry->l1_to_l2_cs = l1_to_l2_cs; 534 + __entry->l2_to_l1_cs = l2_to_l1_cs; 535 + __entry->l2_runtime = l2_runtime; 536 + ), 537 + 538 + TP_printk("VCPU %d: l1_to_l2_cs_time=%llu ns l2_to_l1_cs_time=%llu ns l2_runtime=%llu ns", 539 + __entry->vcpu_id, __entry->l1_to_l2_cs, 540 + __entry->l2_to_l1_cs, __entry->l2_runtime), 541 + kmvhv_counters_tracepoint_regfunc, kmvhv_counters_tracepoint_unregfunc 542 + ); 543 + #endif 515 544 #endif /* _TRACE_KVM_HV_H */ 516 545 517 546 /* This part must be outside protection */