Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 * Benchmarking code execution time inside the kernel
4 *
5 * Copyright (C) 2014, Red Hat, Inc., Jesper Dangaard Brouer
6 * for licensing details see kernel-base/COPYING
7 */
8#ifndef _LINUX_TIME_BENCH_H
9#define _LINUX_TIME_BENCH_H
10
11/* Main structure used for recording a benchmark run */
12struct time_bench_record {
13 uint32_t version_abi;
14 uint32_t loops; /* Requested loop invocations */
15 uint32_t step; /* option for e.g. bulk invocations */
16
17 uint32_t flags; /* Measurements types enabled */
18#define TIME_BENCH_LOOP BIT(0)
19#define TIME_BENCH_TSC BIT(1)
20#define TIME_BENCH_WALLCLOCK BIT(2)
21#define TIME_BENCH_PMU BIT(3)
22
23 uint32_t cpu; /* Used when embedded in time_bench_cpu */
24
25 /* Records */
26 uint64_t invoked_cnt; /* Returned actual invocations */
27 uint64_t tsc_start;
28 uint64_t tsc_stop;
29 struct timespec64 ts_start;
30 struct timespec64 ts_stop;
31 /* PMU counters for instruction and cycles
32 * instructions counter including pipelined instructions
33 */
34 uint64_t pmc_inst_start;
35 uint64_t pmc_inst_stop;
36 /* CPU unhalted clock counter */
37 uint64_t pmc_clk_start;
38 uint64_t pmc_clk_stop;
39
40 /* Result records */
41 uint64_t tsc_interval;
42 uint64_t time_start, time_stop, time_interval; /* in nanosec */
43 uint64_t pmc_inst, pmc_clk;
44
45 /* Derived result records */
46 uint64_t tsc_cycles; // +decimal?
47 uint64_t ns_per_call_quotient, ns_per_call_decimal;
48 uint64_t time_sec;
49 uint32_t time_sec_remainder;
50 uint64_t pmc_ipc_quotient, pmc_ipc_decimal; /* inst per cycle */
51};
52
53/* For synchronizing parallel CPUs to run concurrently */
54struct time_bench_sync {
55 atomic_t nr_tests_running;
56 struct completion start_event;
57};
58
59/* Keep track of CPUs executing our bench function.
60 *
61 * Embed a time_bench_record for storing info per cpu
62 */
63struct time_bench_cpu {
64 struct time_bench_record rec;
65 struct time_bench_sync *sync; /* back ptr */
66 struct task_struct *task;
67 /* "data" opaque could have been placed in time_bench_sync,
68 * but to avoid any false sharing, place it per CPU
69 */
70 void *data;
71 /* Support masking outsome CPUs, mark if it ran */
72 bool did_bench_run;
73 /* int cpu; // note CPU stored in time_bench_record */
74 int (*bench_func)(struct time_bench_record *record, void *data);
75};
76
77/*
78 * Below TSC assembler code is not compatible with other archs, and
79 * can also fail on guests if cpu-flags are not correct.
80 *
81 * The way TSC reading is used, many iterations, does not require as
82 * high accuracy as described below (in Intel Doc #324264).
83 *
84 * Considering changing to use get_cycles() (#include <asm/timex.h>).
85 */
86
87/** TSC (Time-Stamp Counter) based **
88 * Recommend reading, to understand details of reading TSC accurately:
89 * Intel Doc #324264, "How to Benchmark Code Execution Times on Intel"
90 *
91 * Consider getting exclusive ownership of CPU by using:
92 * unsigned long flags;
93 * preempt_disable();
94 * raw_local_irq_save(flags);
95 * _your_code_
96 * raw_local_irq_restore(flags);
97 * preempt_enable();
98 *
99 * Clobbered registers: "%rax", "%rbx", "%rcx", "%rdx"
100 * RDTSC only change "%rax" and "%rdx" but
101 * CPUID clears the high 32-bits of all (rax/rbx/rcx/rdx)
102 */
103static __always_inline uint64_t tsc_start_clock(void)
104{
105 /* See: Intel Doc #324264 */
106 unsigned int hi, lo;
107
108 asm volatile("CPUID\n\t"
109 "RDTSC\n\t"
110 "mov %%edx, %0\n\t"
111 "mov %%eax, %1\n\t"
112 : "=r"(hi), "=r"(lo)::"%rax", "%rbx", "%rcx", "%rdx");
113 //FIXME: on 32bit use clobbered %eax + %edx
114 return ((uint64_t)lo) | (((uint64_t)hi) << 32);
115}
116
117static __always_inline uint64_t tsc_stop_clock(void)
118{
119 /* See: Intel Doc #324264 */
120 unsigned int hi, lo;
121
122 asm volatile("RDTSCP\n\t"
123 "mov %%edx, %0\n\t"
124 "mov %%eax, %1\n\t"
125 "CPUID\n\t"
126 : "=r"(hi), "=r"(lo)::"%rax", "%rbx", "%rcx", "%rdx");
127 return ((uint64_t)lo) | (((uint64_t)hi) << 32);
128}
129
130/** Wall-clock based **
131 *
132 * use: getnstimeofday()
133 * getnstimeofday(&rec->ts_start);
134 * getnstimeofday(&rec->ts_stop);
135 *
136 * API changed see: Documentation/core-api/timekeeping.rst
137 * https://www.kernel.org/doc/html/latest/core-api/timekeeping.html#c.getnstimeofday
138 *
139 * We should instead use: ktime_get_real_ts64() is a direct
140 * replacement, but consider using monotonic time (ktime_get_ts64())
141 * and/or a ktime_t based interface (ktime_get()/ktime_get_real()).
142 */
143
144/** PMU (Performance Monitor Unit) based **
145 *
146 * Needed for calculating: Instructions Per Cycle (IPC)
147 * - The IPC number tell how efficient the CPU pipelining were
148 */
149//lookup: perf_event_create_kernel_counter()
150
151bool time_bench_PMU_config(bool enable);
152
153/* Raw reading via rdpmc() using fixed counters
154 *
155 * From: https://github.com/andikleen/simple-pmu
156 */
157enum {
158 FIXED_SELECT = (1U << 30), /* == 0x40000000 */
159 FIXED_INST_RETIRED_ANY = 0,
160 FIXED_CPU_CLK_UNHALTED_CORE = 1,
161 FIXED_CPU_CLK_UNHALTED_REF = 2,
162};
163
164static __always_inline unsigned int long long p_rdpmc(unsigned int in)
165{
166 unsigned int d, a;
167
168 asm volatile("rdpmc" : "=d"(d), "=a"(a) : "c"(in) : "memory");
169 return ((unsigned long long)d << 32) | a;
170}
171
172/* These PMU counter needs to be enabled, but I don't have the
173 * configure code implemented. My current hack is running:
174 * sudo perf stat -e cycles:k -e instructions:k insmod lib/ring_queue_test.ko
175 */
176/* Reading all pipelined instruction */
177static __always_inline unsigned long long pmc_inst(void)
178{
179 return p_rdpmc(FIXED_SELECT | FIXED_INST_RETIRED_ANY);
180}
181
182/* Reading CPU clock cycles */
183static __always_inline unsigned long long pmc_clk(void)
184{
185 return p_rdpmc(FIXED_SELECT | FIXED_CPU_CLK_UNHALTED_CORE);
186}
187
188/* Raw reading via MSR rdmsr() is likely wrong
189 * FIXME: How can I know which raw MSR registers are conf for what?
190 */
191#define MSR_IA32_PCM0 0x400000C1 /* PERFCTR0 */
192#define MSR_IA32_PCM1 0x400000C2 /* PERFCTR1 */
193#define MSR_IA32_PCM2 0x400000C3
194static inline uint64_t msr_inst(unsigned long long *msr_result)
195{
196 return rdmsrq_safe(MSR_IA32_PCM0, msr_result);
197}
198
199/** Generic functions **
200 */
201bool time_bench_loop(uint32_t loops, int step, char *txt, void *data,
202 int (*func)(struct time_bench_record *rec, void *data));
203bool time_bench_calc_stats(struct time_bench_record *rec);
204
205void time_bench_run_concurrent(uint32_t loops, int step, void *data,
206 const struct cpumask *mask, /* Support masking outsome CPUs*/
207 struct time_bench_sync *sync, struct time_bench_cpu *cpu_tasks,
208 int (*func)(struct time_bench_record *record, void *data));
209void time_bench_print_stats_cpumask(const char *desc,
210 struct time_bench_cpu *cpu_tasks,
211 const struct cpumask *mask);
212
213//FIXME: use rec->flags to select measurement, should be MACRO
214static __always_inline void time_bench_start(struct time_bench_record *rec)
215{
216 //getnstimeofday(&rec->ts_start);
217 ktime_get_real_ts64(&rec->ts_start);
218 if (rec->flags & TIME_BENCH_PMU) {
219 rec->pmc_inst_start = pmc_inst();
220 rec->pmc_clk_start = pmc_clk();
221 }
222 rec->tsc_start = tsc_start_clock();
223}
224
225static __always_inline void time_bench_stop(struct time_bench_record *rec,
226 uint64_t invoked_cnt)
227{
228 rec->tsc_stop = tsc_stop_clock();
229 if (rec->flags & TIME_BENCH_PMU) {
230 rec->pmc_inst_stop = pmc_inst();
231 rec->pmc_clk_stop = pmc_clk();
232 }
233 //getnstimeofday(&rec->ts_stop);
234 ktime_get_real_ts64(&rec->ts_stop);
235 rec->invoked_cnt = invoked_cnt;
236}
237
238#endif /* _LINUX_TIME_BENCH_H */