Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

cpufreq: amd-pstate: Add more tracepoint for AMD P-State module

Add frequency, mperf, aperf and tsc in the trace. This can be used
to debug and tune the performance of AMD P-state driver.

Use the time difference between amd_pstate_update to calculate CPU
frequency. There could be sleep in arch_freq_get_on_cpu, so do not
use it here.

Signed-off-by: Jinzhou Su <Jinzhou.Su@amd.com>
Co-developed-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

authored by

Jinzhou Su and committed by
Rafael J. Wysocki
23c296fb a1b6f487

+78 -3
+21 -1
drivers/cpufreq/amd-pstate-trace.h
··· 27 27 TP_PROTO(unsigned long min_perf, 28 28 unsigned long target_perf, 29 29 unsigned long capacity, 30 + u64 freq, 31 + u64 mperf, 32 + u64 aperf, 33 + u64 tsc, 30 34 unsigned int cpu_id, 31 35 bool changed, 32 36 bool fast_switch ··· 39 35 TP_ARGS(min_perf, 40 36 target_perf, 41 37 capacity, 38 + freq, 39 + mperf, 40 + aperf, 41 + tsc, 42 42 cpu_id, 43 43 changed, 44 44 fast_switch ··· 52 44 __field(unsigned long, min_perf) 53 45 __field(unsigned long, target_perf) 54 46 __field(unsigned long, capacity) 47 + __field(unsigned long long, freq) 48 + __field(unsigned long long, mperf) 49 + __field(unsigned long long, aperf) 50 + __field(unsigned long long, tsc) 55 51 __field(unsigned int, cpu_id) 56 52 __field(bool, changed) 57 53 __field(bool, fast_switch) ··· 65 53 __entry->min_perf = min_perf; 66 54 __entry->target_perf = target_perf; 67 55 __entry->capacity = capacity; 56 + __entry->freq = freq; 57 + __entry->mperf = mperf; 58 + __entry->aperf = aperf; 59 + __entry->tsc = tsc; 68 60 __entry->cpu_id = cpu_id; 69 61 __entry->changed = changed; 70 62 __entry->fast_switch = fast_switch; 71 63 ), 72 64 73 - TP_printk("amd_min_perf=%lu amd_des_perf=%lu amd_max_perf=%lu cpu_id=%u changed=%s fast_switch=%s", 65 + TP_printk("amd_min_perf=%lu amd_des_perf=%lu amd_max_perf=%lu freq=%llu mperf=%llu aperf=%llu tsc=%llu cpu_id=%u changed=%s fast_switch=%s", 74 66 (unsigned long)__entry->min_perf, 75 67 (unsigned long)__entry->target_perf, 76 68 (unsigned long)__entry->capacity, 69 + (unsigned long long)__entry->freq, 70 + (unsigned long long)__entry->mperf, 71 + (unsigned long long)__entry->aperf, 72 + (unsigned long long)__entry->tsc, 77 73 (unsigned int)__entry->cpu_id, 78 74 (__entry->changed) ? "true" : "false", 79 75 (__entry->fast_switch) ? "true" : "false"
+57 -2
drivers/cpufreq/amd-pstate.c
··· 66 66 static struct cpufreq_driver amd_pstate_driver; 67 67 68 68 /** 69 + * struct amd_aperf_mperf 70 + * @aperf: actual performance frequency clock count 71 + * @mperf: maximum performance frequency clock count 72 + * @tsc: time stamp counter 73 + */ 74 + struct amd_aperf_mperf { 75 + u64 aperf; 76 + u64 mperf; 77 + u64 tsc; 78 + }; 79 + 80 + /** 69 81 * struct amd_cpudata - private CPU data for AMD P-State 70 82 * @cpu: CPU number 71 83 * @req: constraint request to apply ··· 93 81 * @min_freq: the frequency that mapped to lowest_perf 94 82 * @nominal_freq: the frequency that mapped to nominal_perf 95 83 * @lowest_nonlinear_freq: the frequency that mapped to lowest_nonlinear_perf 84 + * @cur: Difference of Aperf/Mperf/tsc count between last and current sample 85 + * @prev: Last Aperf/Mperf/tsc count value read from register 86 + * @freq: current cpu frequency value 96 87 * @boost_supported: check whether the Processor or SBIOS supports boost mode 97 88 * 98 89 * The amd_cpudata is key private data for each CPU thread in AMD P-State, and ··· 117 102 u32 nominal_freq; 118 103 u32 lowest_nonlinear_freq; 119 104 105 + struct amd_aperf_mperf cur; 106 + struct amd_aperf_mperf prev; 107 + 108 + u64 freq; 120 109 bool boost_supported; 121 110 }; 122 111 ··· 230 211 max_perf, fast_switch); 231 212 } 232 213 214 + static inline bool amd_pstate_sample(struct amd_cpudata *cpudata) 215 + { 216 + u64 aperf, mperf, tsc; 217 + unsigned long flags; 218 + 219 + local_irq_save(flags); 220 + rdmsrl(MSR_IA32_APERF, aperf); 221 + rdmsrl(MSR_IA32_MPERF, mperf); 222 + tsc = rdtsc(); 223 + 224 + if (cpudata->prev.mperf == mperf || cpudata->prev.tsc == tsc) { 225 + local_irq_restore(flags); 226 + return false; 227 + } 228 + 229 + local_irq_restore(flags); 230 + 231 + cpudata->cur.aperf = aperf; 232 + cpudata->cur.mperf = mperf; 233 + cpudata->cur.tsc = tsc; 234 + cpudata->cur.aperf -= cpudata->prev.aperf; 235 + cpudata->cur.mperf -= cpudata->prev.mperf; 236 + cpudata->cur.tsc -= cpudata->prev.tsc; 237 + 238 + cpudata->prev.aperf = aperf; 239 + cpudata->prev.mperf = mperf; 240 + cpudata->prev.tsc = tsc; 241 + 242 + cpudata->freq = div64_u64((cpudata->cur.aperf * cpu_khz), cpudata->cur.mperf); 243 + 244 + return true; 245 + } 246 + 233 247 static void amd_pstate_update(struct amd_cpudata *cpudata, u32 min_perf, 234 248 u32 des_perf, u32 max_perf, bool fast_switch) 235 249 { ··· 278 226 value &= ~AMD_CPPC_MAX_PERF(~0L); 279 227 value |= AMD_CPPC_MAX_PERF(max_perf); 280 228 281 - trace_amd_pstate_perf(min_perf, des_perf, max_perf, 282 - cpudata->cpu, (value != prev), fast_switch); 229 + if (trace_amd_pstate_perf_enabled() && amd_pstate_sample(cpudata)) { 230 + trace_amd_pstate_perf(min_perf, des_perf, max_perf, cpudata->freq, 231 + cpudata->cur.mperf, cpudata->cur.aperf, cpudata->cur.tsc, 232 + cpudata->cpu, (value != prev), fast_switch); 233 + } 283 234 284 235 if (value == prev) 285 236 return;