Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

tracepoint: Optimize using static_call()

Currently the tracepoint site will iterate a vector and issue indirect
calls to however many handlers are registered (ie. the vector is
long).

Using static_call() it is possible to optimize this for the common
case of only having a single handler registered. In this case the
static_call() can directly call this handler. Otherwise, if the vector
is longer than 1, call a function that iterates the whole vector like
the current code.

[peterz: updated to new interface]

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lore.kernel.org/r/20200818135805.279421092@infradead.org

authored by

Steven Rostedt (VMware) and committed by
Ingo Molnar
d25e37d8 a945c834

+94 -36
+5
include/linux/tracepoint-defs.h
··· 11 11 #include <linux/atomic.h> 12 12 #include <linux/static_key.h> 13 13 14 + struct static_call_key; 15 + 14 16 struct trace_print_flags { 15 17 unsigned long mask; 16 18 const char *name; ··· 32 30 struct tracepoint { 33 31 const char *name; /* Tracepoint name */ 34 32 struct static_key key; 33 + struct static_call_key *static_call_key; 34 + void *static_call_tramp; 35 + void *iterator; 35 36 int (*regfunc)(void); 36 37 void (*unregfunc)(void); 37 38 struct tracepoint_func __rcu *funcs;
+61 -25
include/linux/tracepoint.h
··· 19 19 #include <linux/cpumask.h> 20 20 #include <linux/rcupdate.h> 21 21 #include <linux/tracepoint-defs.h> 22 + #include <linux/static_call.h> 22 23 23 24 struct module; 24 25 struct tracepoint; ··· 93 92 extern void syscall_unregfunc(void); 94 93 #endif /* CONFIG_HAVE_SYSCALL_TRACEPOINTS */ 95 94 95 + #ifndef PARAMS 96 96 #define PARAMS(args...) args 97 + #endif 97 98 98 99 #define TRACE_DEFINE_ENUM(x) 99 100 #define TRACE_DEFINE_SIZEOF(x) ··· 151 148 152 149 #ifdef TRACEPOINTS_ENABLED 153 150 151 + #ifdef CONFIG_HAVE_STATIC_CALL 152 + #define __DO_TRACE_CALL(name) static_call(tp_func_##name) 153 + #else 154 + #define __DO_TRACE_CALL(name) __tracepoint_iter_##name 155 + #endif /* CONFIG_HAVE_STATIC_CALL */ 156 + 154 157 /* 155 158 * it_func[0] is never NULL because there is at least one element in the array 156 159 * when the array itself is non NULL. ··· 166 157 * has a "void" prototype, then it is invalid to declare a function 167 158 * as "(void *, void)". 168 159 */ 169 - #define __DO_TRACE(tp, proto, args, cond, rcuidle) \ 160 + #define __DO_TRACE(name, proto, args, cond, rcuidle) \ 170 161 do { \ 171 162 struct tracepoint_func *it_func_ptr; \ 172 - void *it_func; \ 173 - void *__data; \ 174 163 int __maybe_unused __idx = 0; \ 164 + void *__data; \ 175 165 \ 176 166 if (!(cond)) \ 177 167 return; \ ··· 190 182 rcu_irq_enter_irqson(); \ 191 183 } \ 192 184 \ 193 - it_func_ptr = rcu_dereference_raw((tp)->funcs); \ 194 - \ 185 + it_func_ptr = \ 186 + rcu_dereference_raw((&__tracepoint_##name)->funcs); \ 195 187 if (it_func_ptr) { \ 196 - do { \ 197 - it_func = (it_func_ptr)->func; \ 198 - __data = (it_func_ptr)->data; \ 199 - ((void(*)(proto))(it_func))(args); \ 200 - } while ((++it_func_ptr)->func); \ 188 + __data = (it_func_ptr)->data; \ 189 + __DO_TRACE_CALL(name)(args); \ 201 190 } \ 202 191 \ 203 192 if (rcuidle) { \ ··· 210 205 static inline void trace_##name##_rcuidle(proto) \ 211 206 { \ 212 207 if (static_key_false(&__tracepoint_##name.key)) \ 213 - __DO_TRACE(&__tracepoint_##name, \ 208 + __DO_TRACE(name, \ 214 209 TP_PROTO(data_proto), \ 215 210 TP_ARGS(data_args), \ 216 211 TP_CONDITION(cond), 1); \ ··· 232 227 * poking RCU a bit. 233 228 */ 234 229 #define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \ 230 + extern int __tracepoint_iter_##name(data_proto); \ 231 + DECLARE_STATIC_CALL(tp_func_##name, __tracepoint_iter_##name); \ 235 232 extern struct tracepoint __tracepoint_##name; \ 236 233 static inline void trace_##name(proto) \ 237 234 { \ 238 235 if (static_key_false(&__tracepoint_##name.key)) \ 239 - __DO_TRACE(&__tracepoint_##name, \ 236 + __DO_TRACE(name, \ 240 237 TP_PROTO(data_proto), \ 241 238 TP_ARGS(data_args), \ 242 239 TP_CONDITION(cond), 0); \ ··· 284 277 * structures, so we create an array of pointers that will be used for iteration 285 278 * on the tracepoints. 286 279 */ 287 - #define DEFINE_TRACE_FN(name, reg, unreg) \ 288 - static const char __tpstrtab_##name[] \ 289 - __section(__tracepoints_strings) = #name; \ 290 - struct tracepoint __tracepoint_##name __used \ 291 - __section(__tracepoints) = \ 292 - { __tpstrtab_##name, STATIC_KEY_INIT_FALSE, reg, unreg, NULL };\ 293 - __TRACEPOINT_ENTRY(name); 280 + #define DEFINE_TRACE_FN(_name, _reg, _unreg, proto, args) \ 281 + static const char __tpstrtab_##_name[] \ 282 + __section(__tracepoints_strings) = #_name; \ 283 + extern struct static_call_key STATIC_CALL_KEY(tp_func_##_name); \ 284 + int __tracepoint_iter_##_name(void *__data, proto); \ 285 + struct tracepoint __tracepoint_##_name __used \ 286 + __section(__tracepoints) = { \ 287 + .name = __tpstrtab_##_name, \ 288 + .key = STATIC_KEY_INIT_FALSE, \ 289 + .static_call_key = &STATIC_CALL_KEY(tp_func_##_name), \ 290 + .static_call_tramp = STATIC_CALL_TRAMP_ADDR(tp_func_##_name), \ 291 + .iterator = &__tracepoint_iter_##_name, \ 292 + .regfunc = _reg, \ 293 + .unregfunc = _unreg, \ 294 + .funcs = NULL }; \ 295 + __TRACEPOINT_ENTRY(_name); \ 296 + int __tracepoint_iter_##_name(void *__data, proto) \ 297 + { \ 298 + struct tracepoint_func *it_func_ptr; \ 299 + void *it_func; \ 300 + \ 301 + it_func_ptr = \ 302 + rcu_dereference_raw((&__tracepoint_##_name)->funcs); \ 303 + do { \ 304 + it_func = (it_func_ptr)->func; \ 305 + __data = (it_func_ptr)->data; \ 306 + ((void(*)(void *, proto))(it_func))(__data, args); \ 307 + } while ((++it_func_ptr)->func); \ 308 + return 0; \ 309 + } \ 310 + DEFINE_STATIC_CALL(tp_func_##_name, __tracepoint_iter_##_name); 294 311 295 - #define DEFINE_TRACE(name) \ 296 - DEFINE_TRACE_FN(name, NULL, NULL); 312 + #define DEFINE_TRACE(name, proto, args) \ 313 + DEFINE_TRACE_FN(name, NULL, NULL, PARAMS(proto), PARAMS(args)); 297 314 298 315 #define EXPORT_TRACEPOINT_SYMBOL_GPL(name) \ 299 - EXPORT_SYMBOL_GPL(__tracepoint_##name) 316 + EXPORT_SYMBOL_GPL(__tracepoint_##name); \ 317 + EXPORT_SYMBOL_GPL(__tracepoint_iter_##name); \ 318 + EXPORT_STATIC_CALL_GPL(tp_func_##name) 300 319 #define EXPORT_TRACEPOINT_SYMBOL(name) \ 301 - EXPORT_SYMBOL(__tracepoint_##name) 320 + EXPORT_SYMBOL(__tracepoint_##name); \ 321 + EXPORT_SYMBOL(__tracepoint_iter_##name); \ 322 + EXPORT_STATIC_CALL(tp_func_##name) 323 + 302 324 303 325 #else /* !TRACEPOINTS_ENABLED */ 304 326 #define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \ ··· 356 320 return false; \ 357 321 } 358 322 359 - #define DEFINE_TRACE_FN(name, reg, unreg) 360 - #define DEFINE_TRACE(name) 323 + #define DEFINE_TRACE_FN(name, reg, unreg, proto, args) 324 + #define DEFINE_TRACE(name, proto, args) 361 325 #define EXPORT_TRACEPOINT_SYMBOL_GPL(name) 362 326 #define EXPORT_TRACEPOINT_SYMBOL(name) 363 327
+7 -7
include/trace/define_trace.h
··· 25 25 26 26 #undef TRACE_EVENT 27 27 #define TRACE_EVENT(name, proto, args, tstruct, assign, print) \ 28 - DEFINE_TRACE(name) 28 + DEFINE_TRACE(name, PARAMS(proto), PARAMS(args)) 29 29 30 30 #undef TRACE_EVENT_CONDITION 31 31 #define TRACE_EVENT_CONDITION(name, proto, args, cond, tstruct, assign, print) \ ··· 39 39 #undef TRACE_EVENT_FN 40 40 #define TRACE_EVENT_FN(name, proto, args, tstruct, \ 41 41 assign, print, reg, unreg) \ 42 - DEFINE_TRACE_FN(name, reg, unreg) 42 + DEFINE_TRACE_FN(name, reg, unreg, PARAMS(proto), PARAMS(args)) 43 43 44 44 #undef TRACE_EVENT_FN_COND 45 45 #define TRACE_EVENT_FN_COND(name, proto, args, cond, tstruct, \ 46 46 assign, print, reg, unreg) \ 47 - DEFINE_TRACE_FN(name, reg, unreg) 47 + DEFINE_TRACE_FN(name, reg, unreg, PARAMS(proto), PARAMS(args)) 48 48 49 49 #undef TRACE_EVENT_NOP 50 50 #define TRACE_EVENT_NOP(name, proto, args, struct, assign, print) ··· 54 54 55 55 #undef DEFINE_EVENT 56 56 #define DEFINE_EVENT(template, name, proto, args) \ 57 - DEFINE_TRACE(name) 57 + DEFINE_TRACE(name, PARAMS(proto), PARAMS(args)) 58 58 59 59 #undef DEFINE_EVENT_FN 60 60 #define DEFINE_EVENT_FN(template, name, proto, args, reg, unreg) \ 61 - DEFINE_TRACE_FN(name, reg, unreg) 61 + DEFINE_TRACE_FN(name, reg, unreg, PARAMS(proto), PARAMS(args)) 62 62 63 63 #undef DEFINE_EVENT_PRINT 64 64 #define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ 65 - DEFINE_TRACE(name) 65 + DEFINE_TRACE(name, PARAMS(proto), PARAMS(args)) 66 66 67 67 #undef DEFINE_EVENT_CONDITION 68 68 #define DEFINE_EVENT_CONDITION(template, name, proto, args, cond) \ ··· 70 70 71 71 #undef DECLARE_TRACE 72 72 #define DECLARE_TRACE(name, proto, args) \ 73 - DEFINE_TRACE(name) 73 + DEFINE_TRACE(name, PARAMS(proto), PARAMS(args)) 74 74 75 75 #undef TRACE_INCLUDE 76 76 #undef __TRACE_INCLUDE
+21 -4
kernel/tracepoint.c
··· 221 221 return old; 222 222 } 223 223 224 + static void tracepoint_update_call(struct tracepoint *tp, struct tracepoint_func *tp_funcs) 225 + { 226 + void *func = tp->iterator; 227 + 228 + /* Synthetic events do not have static call sites */ 229 + if (!tp->static_call_key) 230 + return; 231 + 232 + if (!tp_funcs[1].func) 233 + func = tp_funcs[0].func; 234 + 235 + __static_call_update(tp->static_call_key, tp->static_call_tramp, func); 236 + } 237 + 224 238 /* 225 239 * Add the probe function to a tracepoint. 226 240 */ ··· 265 251 * include/linux/tracepoint.h using rcu_dereference_sched(). 266 252 */ 267 253 rcu_assign_pointer(tp->funcs, tp_funcs); 268 - if (!static_key_enabled(&tp->key)) 269 - static_key_slow_inc(&tp->key); 254 + tracepoint_update_call(tp, tp_funcs); 255 + static_key_enable(&tp->key); 256 + 270 257 release_probes(old); 271 258 return 0; 272 259 } ··· 296 281 if (tp->unregfunc && static_key_enabled(&tp->key)) 297 282 tp->unregfunc(); 298 283 299 - if (static_key_enabled(&tp->key)) 300 - static_key_slow_dec(&tp->key); 284 + static_key_disable(&tp->key); 285 + } else { 286 + tracepoint_update_call(tp, tp_funcs); 301 287 } 288 + 302 289 rcu_assign_pointer(tp->funcs, tp_funcs); 303 290 release_probes(old); 304 291 return 0;