Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

uprobes: turn trace_uprobe's nhit counter to be per-CPU one

trace_uprobe->nhit counter is not incremented atomically, so its value
is questionable in when uprobe is hit on multiple CPUs simultaneously.

Also, doing this shared counter increment across many CPUs causes heavy
cache line bouncing, limiting uprobe/uretprobe performance scaling with
number of CPUs.

Solve both problems by making this a per-CPU counter.

Link: https://lore.kernel.org/all/20240813203409.3985398-1-andrii@kernel.org/

Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>

authored by

Andrii Nakryiko and committed by
Masami Hiramatsu (Google)
10cdb82a da3ea350

+21 -3
+21 -3
kernel/trace/trace_uprobe.c
··· 17 17 #include <linux/string.h> 18 18 #include <linux/rculist.h> 19 19 #include <linux/filter.h> 20 + #include <linux/percpu.h> 20 21 21 22 #include "trace_dynevent.h" 22 23 #include "trace_probe.h" ··· 63 62 char *filename; 64 63 unsigned long offset; 65 64 unsigned long ref_ctr_offset; 66 - unsigned long nhit; 65 + unsigned long __percpu *nhits; 67 66 struct trace_probe tp; 68 67 }; 69 68 ··· 338 337 if (!tu) 339 338 return ERR_PTR(-ENOMEM); 340 339 340 + tu->nhits = alloc_percpu(unsigned long); 341 + if (!tu->nhits) { 342 + ret = -ENOMEM; 343 + goto error; 344 + } 345 + 341 346 ret = trace_probe_init(&tu->tp, event, group, true, nargs); 342 347 if (ret < 0) 343 348 goto error; ··· 356 349 return tu; 357 350 358 351 error: 352 + free_percpu(tu->nhits); 359 353 kfree(tu); 360 354 361 355 return ERR_PTR(ret); ··· 370 362 path_put(&tu->path); 371 363 trace_probe_cleanup(&tu->tp); 372 364 kfree(tu->filename); 365 + free_percpu(tu->nhits); 373 366 kfree(tu); 374 367 } 375 368 ··· 824 815 { 825 816 struct dyn_event *ev = v; 826 817 struct trace_uprobe *tu; 818 + unsigned long nhits; 819 + int cpu; 827 820 828 821 if (!is_trace_uprobe(ev)) 829 822 return 0; 830 823 831 824 tu = to_trace_uprobe(ev); 825 + 826 + nhits = 0; 827 + for_each_possible_cpu(cpu) { 828 + nhits += per_cpu(*tu->nhits, cpu); 829 + } 830 + 832 831 seq_printf(m, " %s %-44s %15lu\n", tu->filename, 833 - trace_probe_name(&tu->tp), tu->nhit); 832 + trace_probe_name(&tu->tp), nhits); 834 833 return 0; 835 834 } 836 835 ··· 1529 1512 int ret = 0; 1530 1513 1531 1514 tu = container_of(con, struct trace_uprobe, consumer); 1532 - tu->nhit++; 1515 + 1516 + this_cpu_inc(*tu->nhits); 1533 1517 1534 1518 udd.tu = tu; 1535 1519 udd.bp_addr = instruction_pointer(regs);