Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

tracing: fprobe: optimization for entry only case

For now, fgraph is used for the fprobe, even if we need trace the entry
only. However, the performance of ftrace is better than fgraph, and we
can use ftrace_ops for this case.

Then performance of kprobe-multi increases from 54M to 69M. Before this
commit:

$ ./benchs/run_bench_trigger.sh kprobe-multi
kprobe-multi : 54.663 ± 0.493M/s

After this commit:

$ ./benchs/run_bench_trigger.sh kprobe-multi
kprobe-multi : 69.447 ± 0.143M/s

Mitigation is disable during the bench testing above.

Link: https://lore.kernel.org/all/20251015083238.2374294-2-dongml2@chinatelecom.cn/

Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>

authored by

Menglong Dong and committed by
Masami Hiramatsu (Google)
2c67dc45 e667152e

+119 -9
+119 -9
kernel/trace/fprobe.c
··· 254 254 return ret; 255 255 } 256 256 257 - static int fprobe_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops, 258 - struct ftrace_regs *fregs) 257 + #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS 258 + /* ftrace_ops callback, this processes fprobes which have only entry_handler. */ 259 + static void fprobe_ftrace_entry(unsigned long ip, unsigned long parent_ip, 260 + struct ftrace_ops *ops, struct ftrace_regs *fregs) 261 + { 262 + struct fprobe_hlist_node *node; 263 + struct rhlist_head *head, *pos; 264 + struct fprobe *fp; 265 + int bit; 266 + 267 + bit = ftrace_test_recursion_trylock(ip, parent_ip); 268 + if (bit < 0) 269 + return; 270 + 271 + /* 272 + * ftrace_test_recursion_trylock() disables preemption, but 273 + * rhltable_lookup() checks whether rcu_read_lcok is held. 274 + * So we take rcu_read_lock() here. 275 + */ 276 + rcu_read_lock(); 277 + head = rhltable_lookup(&fprobe_ip_table, &ip, fprobe_rht_params); 278 + 279 + rhl_for_each_entry_rcu(node, pos, head, hlist) { 280 + if (node->addr != ip) 281 + break; 282 + fp = READ_ONCE(node->fp); 283 + if (unlikely(!fp || fprobe_disabled(fp) || fp->exit_handler)) 284 + continue; 285 + 286 + if (fprobe_shared_with_kprobes(fp)) 287 + __fprobe_kprobe_handler(ip, parent_ip, fp, fregs, NULL); 288 + else 289 + __fprobe_handler(ip, parent_ip, fp, fregs, NULL); 290 + } 291 + rcu_read_unlock(); 292 + ftrace_test_recursion_unlock(bit); 293 + } 294 + NOKPROBE_SYMBOL(fprobe_ftrace_entry); 295 + 296 + static struct ftrace_ops fprobe_ftrace_ops = { 297 + .func = fprobe_ftrace_entry, 298 + .flags = FTRACE_OPS_FL_SAVE_REGS, 299 + }; 300 + static int fprobe_ftrace_active; 301 + 302 + static int fprobe_ftrace_add_ips(unsigned long *addrs, int num) 303 + { 304 + int ret; 305 + 306 + lockdep_assert_held(&fprobe_mutex); 307 + 308 + ret = ftrace_set_filter_ips(&fprobe_ftrace_ops, addrs, num, 0, 0); 309 + if (ret) 310 + return ret; 311 + 312 + if (!fprobe_ftrace_active) { 313 + ret = register_ftrace_function(&fprobe_ftrace_ops); 314 + if (ret) { 315 + ftrace_free_filter(&fprobe_ftrace_ops); 316 + return ret; 317 + } 318 + } 319 + fprobe_ftrace_active++; 320 + return 0; 321 + } 322 + 323 + static void fprobe_ftrace_remove_ips(unsigned long *addrs, int num) 324 + { 325 + lockdep_assert_held(&fprobe_mutex); 326 + 327 + fprobe_ftrace_active--; 328 + if (!fprobe_ftrace_active) 329 + unregister_ftrace_function(&fprobe_ftrace_ops); 330 + if (num) 331 + ftrace_set_filter_ips(&fprobe_ftrace_ops, addrs, num, 1, 0); 332 + } 333 + 334 + static bool fprobe_is_ftrace(struct fprobe *fp) 335 + { 336 + return !fp->exit_handler; 337 + } 338 + #else 339 + static int fprobe_ftrace_add_ips(unsigned long *addrs, int num) 340 + { 341 + return -ENOENT; 342 + } 343 + 344 + static void fprobe_ftrace_remove_ips(unsigned long *addrs, int num) 345 + { 346 + } 347 + 348 + static bool fprobe_is_ftrace(struct fprobe *fp) 349 + { 350 + return false; 351 + } 352 + #endif 353 + 354 + /* fgraph_ops callback, this processes fprobes which have exit_handler. */ 355 + static int fprobe_fgraph_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops, 356 + struct ftrace_regs *fregs) 259 357 { 260 358 unsigned long *fgraph_data = NULL; 261 359 unsigned long func = trace->func; ··· 390 292 if (node->addr != func) 391 293 continue; 392 294 fp = READ_ONCE(node->fp); 393 - if (fp && !fprobe_disabled(fp)) 295 + if (fp && !fprobe_disabled(fp) && !fprobe_is_ftrace(fp)) 394 296 fp->nmissed++; 395 297 } 396 298 return 0; ··· 410 312 if (node->addr != func) 411 313 continue; 412 314 fp = READ_ONCE(node->fp); 413 - if (!fp || fprobe_disabled(fp)) 315 + if (unlikely(!fp || fprobe_disabled(fp) || fprobe_is_ftrace(fp))) 414 316 continue; 415 317 416 318 data_size = fp->entry_data_size; ··· 438 340 /* If any exit_handler is set, data must be used. */ 439 341 return used != 0; 440 342 } 441 - NOKPROBE_SYMBOL(fprobe_entry); 343 + NOKPROBE_SYMBOL(fprobe_fgraph_entry); 442 344 443 345 static void fprobe_return(struct ftrace_graph_ret *trace, 444 346 struct fgraph_ops *gops, ··· 477 379 NOKPROBE_SYMBOL(fprobe_return); 478 380 479 381 static struct fgraph_ops fprobe_graph_ops = { 480 - .entryfunc = fprobe_entry, 382 + .entryfunc = fprobe_fgraph_entry, 481 383 .retfunc = fprobe_return, 482 384 }; 483 385 static int fprobe_graph_active; ··· 596 498 } while (node == ERR_PTR(-EAGAIN)); 597 499 rhashtable_walk_exit(&iter); 598 500 599 - if (alist.index > 0) 501 + if (alist.index > 0) { 600 502 ftrace_set_filter_ips(&fprobe_graph_ops.ops, 601 503 alist.addrs, alist.index, 1, 0); 504 + #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS 505 + ftrace_set_filter_ips(&fprobe_ftrace_ops, 506 + alist.addrs, alist.index, 1, 0); 507 + #endif 508 + } 602 509 mutex_unlock(&fprobe_mutex); 603 510 604 511 kfree(alist.addrs); ··· 836 733 mutex_lock(&fprobe_mutex); 837 734 838 735 hlist_array = fp->hlist_array; 839 - ret = fprobe_graph_add_ips(addrs, num); 736 + if (fprobe_is_ftrace(fp)) 737 + ret = fprobe_ftrace_add_ips(addrs, num); 738 + else 739 + ret = fprobe_graph_add_ips(addrs, num); 740 + 840 741 if (!ret) { 841 742 add_fprobe_hash(fp); 842 743 for (i = 0; i < hlist_array->size; i++) { ··· 936 829 } 937 830 del_fprobe_hash(fp); 938 831 939 - fprobe_graph_remove_ips(addrs, count); 832 + if (fprobe_is_ftrace(fp)) 833 + fprobe_ftrace_remove_ips(addrs, count); 834 + else 835 + fprobe_graph_remove_ips(addrs, count); 940 836 941 837 kfree_rcu(hlist_array, rcu); 942 838 fp->hlist_array = NULL;