Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

tracing: Provide trace events interface for uprobes

Implements trace_event support for uprobes. In its current form
it can be used to put probes at a specified offset in a file and
dump the required registers when the code flow reaches the
probed address.

The following example shows how to dump the instruction pointer
and %ax a register at the probed text address. Here we are
trying to probe zfree in /bin/zsh:

# cd /sys/kernel/debug/tracing/
# cat /proc/`pgrep zsh`/maps | grep /bin/zsh | grep r-xp
00400000-0048a000 r-xp 00000000 08:03 130904 /bin/zsh
# objdump -T /bin/zsh | grep -w zfree
0000000000446420 g DF .text 0000000000000012 Base
zfree # echo 'p /bin/zsh:0x46420 %ip %ax' > uprobe_events
# cat uprobe_events
p:uprobes/p_zsh_0x46420 /bin/zsh:0x0000000000046420
# echo 1 > events/uprobes/enable
# sleep 20
# echo 0 > events/uprobes/enable
# cat trace
# tracer: nop
#
# TASK-PID CPU# TIMESTAMP FUNCTION
# | | | | |
zsh-24842 [006] 258544.995456: p_zsh_0x46420: (0x446420) arg1=446421 arg2=79
zsh-24842 [007] 258545.000270: p_zsh_0x46420: (0x446420) arg1=446421 arg2=79
zsh-24842 [002] 258545.043929: p_zsh_0x46420: (0x446420) arg1=446421 arg2=79
zsh-24842 [004] 258547.046129: p_zsh_0x46420: (0x446420) arg1=446421 arg2=79

Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Acked-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Jim Keniston <jkenisto@linux.vnet.ibm.com>
Cc: Linux-mm <linux-mm@kvack.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Anton Arapov <anton@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20120411103043.GB29437@linux.vnet.ibm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

authored by

Srikar Dronamraju and committed by
Ingo Molnar
f3f096cf 8ab83f56

+919 -7
+95
Documentation/trace/uprobetracer.txt
··· 1 + Uprobe-tracer: Uprobe-based Event Tracing 2 + ========================================= 3 + Documentation written by Srikar Dronamraju 4 + 5 + Overview 6 + -------- 7 + Uprobe based trace events are similar to kprobe based trace events. 8 + To enable this feature, build your kernel with CONFIG_UPROBE_EVENTS=y. 9 + 10 + Similar to the kprobe-event tracer, this doesn't need to be activated via 11 + current_tracer. Instead of that, add probe points via 12 + /sys/kernel/debug/tracing/uprobe_events, and enable it via 13 + /sys/kernel/debug/tracing/events/uprobes/<EVENT>/enabled. 14 + 15 + However unlike kprobe-event tracer, the uprobe event interface expects the 16 + user to calculate the offset of the probepoint in the object 17 + 18 + Synopsis of uprobe_tracer 19 + ------------------------- 20 + p[:[GRP/]EVENT] PATH:SYMBOL[+offs] [FETCHARGS] : Set a probe 21 + 22 + GRP : Group name. If omitted, use "uprobes" for it. 23 + EVENT : Event name. If omitted, the event name is generated 24 + based on SYMBOL+offs. 25 + PATH : path to an executable or a library. 26 + SYMBOL[+offs] : Symbol+offset where the probe is inserted. 27 + 28 + FETCHARGS : Arguments. Each probe can have up to 128 args. 29 + %REG : Fetch register REG 30 + 31 + Event Profiling 32 + --------------- 33 + You can check the total number of probe hits and probe miss-hits via 34 + /sys/kernel/debug/tracing/uprobe_profile. 35 + The first column is event name, the second is the number of probe hits, 36 + the third is the number of probe miss-hits. 37 + 38 + Usage examples 39 + -------------- 40 + To add a probe as a new event, write a new definition to uprobe_events 41 + as below. 42 + 43 + echo 'p: /bin/bash:0x4245c0' > /sys/kernel/debug/tracing/uprobe_events 44 + 45 + This sets a uprobe at an offset of 0x4245c0 in the executable /bin/bash 46 + 47 + echo > /sys/kernel/debug/tracing/uprobe_events 48 + 49 + This clears all probe points. 50 + 51 + The following example shows how to dump the instruction pointer and %ax 52 + a register at the probed text address. Here we are trying to probe 53 + function zfree in /bin/zsh 54 + 55 + # cd /sys/kernel/debug/tracing/ 56 + # cat /proc/`pgrep zsh`/maps | grep /bin/zsh | grep r-xp 57 + 00400000-0048a000 r-xp 00000000 08:03 130904 /bin/zsh 58 + # objdump -T /bin/zsh | grep -w zfree 59 + 0000000000446420 g DF .text 0000000000000012 Base zfree 60 + 61 + 0x46420 is the offset of zfree in object /bin/zsh that is loaded at 62 + 0x00400000. Hence the command to probe would be : 63 + 64 + # echo 'p /bin/zsh:0x46420 %ip %ax' > uprobe_events 65 + 66 + Please note: User has to explicitly calculate the offset of the probepoint 67 + in the object. We can see the events that are registered by looking at the 68 + uprobe_events file. 69 + 70 + # cat uprobe_events 71 + p:uprobes/p_zsh_0x46420 /bin/zsh:0x0000000000046420 72 + 73 + Right after definition, each event is disabled by default. For tracing these 74 + events, you need to enable it by: 75 + 76 + # echo 1 > events/uprobes/enable 77 + 78 + Lets disable the event after sleeping for some time. 79 + # sleep 20 80 + # echo 0 > events/uprobes/enable 81 + 82 + And you can see the traced information via /sys/kernel/debug/tracing/trace. 83 + 84 + # cat trace 85 + # tracer: nop 86 + # 87 + # TASK-PID CPU# TIMESTAMP FUNCTION 88 + # | | | | | 89 + zsh-24842 [006] 258544.995456: p_zsh_0x46420: (0x446420) arg1=446421 arg2=79 90 + zsh-24842 [007] 258545.000270: p_zsh_0x46420: (0x446420) arg1=446421 arg2=79 91 + zsh-24842 [002] 258545.043929: p_zsh_0x46420: (0x446420) arg1=446421 arg2=79 92 + zsh-24842 [004] 258547.046129: p_zsh_0x46420: (0x446420) arg1=446421 arg2=79 93 + 94 + Each line shows us probes were triggered for a pid 24842 with ip being 95 + 0x446421 and contents of ax register being 79.
+1 -1
arch/Kconfig
··· 78 78 79 79 config UPROBES 80 80 bool "Transparent user-space probes (EXPERIMENTAL)" 81 - depends on ARCH_SUPPORTS_UPROBES && PERF_EVENTS 81 + depends on UPROBE_EVENTS && PERF_EVENTS 82 82 default n 83 83 help 84 84 Uprobes is the user-space counterpart to kprobes: they
+16
kernel/trace/Kconfig
··· 386 386 This option is also required by perf-probe subcommand of perf tools. 387 387 If you want to use perf tools, this option is strongly recommended. 388 388 389 + config UPROBE_EVENT 390 + bool "Enable uprobes-based dynamic events" 391 + depends on ARCH_SUPPORTS_UPROBES 392 + depends on MMU 393 + select UPROBES 394 + select PROBE_EVENTS 395 + select TRACING 396 + default n 397 + help 398 + This allows the user to add tracing events on top of userspace 399 + dynamic events (similar to tracepoints) on the fly via the trace 400 + events interface. Those events can be inserted wherever uprobes 401 + can probe, and record various registers. 402 + This option is required if you plan to use perf-probe subcommand 403 + of perf tools on user space applications. 404 + 389 405 config PROBE_EVENTS 390 406 def_bool n 391 407
+1
kernel/trace/Makefile
··· 62 62 obj-$(CONFIG_KGDB_KDB) += trace_kdb.o 63 63 endif 64 64 obj-$(CONFIG_PROBE_EVENTS) += trace_probe.o 65 + obj-$(CONFIG_UPROBE_EVENT) += trace_uprobe.o 65 66 66 67 libftrace-y := ftrace.o
+5
kernel/trace/trace.h
··· 103 103 unsigned long ret_ip; 104 104 }; 105 105 106 + struct uprobe_trace_entry_head { 107 + struct trace_entry ent; 108 + unsigned long ip; 109 + }; 110 + 106 111 /* 107 112 * trace_flag_type is an enumeration that holds different 108 113 * states when a trace occurs. These are:
+1 -1
kernel/trace/trace_kprobe.c
··· 525 525 526 526 /* Parse fetch argument */ 527 527 ret = traceprobe_parse_probe_arg(arg, &tp->size, &tp->args[i], 528 - is_return); 528 + is_return, true); 529 529 if (ret) { 530 530 pr_info("Parse error at argument[%d]. (%d)\n", i, ret); 531 531 goto error;
+10 -4
kernel/trace/trace_probe.c
··· 550 550 551 551 /* Recursive argument parser */ 552 552 static int parse_probe_arg(char *arg, const struct fetch_type *t, 553 - struct fetch_param *f, bool is_return) 553 + struct fetch_param *f, bool is_return, bool is_kprobe) 554 554 { 555 555 unsigned long param; 556 556 long offset; ··· 558 558 int ret; 559 559 560 560 ret = 0; 561 + 562 + /* Until uprobe_events supports only reg arguments */ 563 + if (!is_kprobe && arg[0] != '%') 564 + return -EINVAL; 565 + 561 566 switch (arg[0]) { 562 567 case '$': 563 568 ret = parse_probe_vars(arg + 1, t, f, is_return); ··· 624 619 return -ENOMEM; 625 620 626 621 dprm->offset = offset; 627 - ret = parse_probe_arg(arg, t2, &dprm->orig, is_return); 622 + ret = parse_probe_arg(arg, t2, &dprm->orig, is_return, 623 + is_kprobe); 628 624 if (ret) 629 625 kfree(dprm); 630 626 else { ··· 683 677 684 678 /* String length checking wrapper */ 685 679 int traceprobe_parse_probe_arg(char *arg, ssize_t *size, 686 - struct probe_arg *parg, bool is_return) 680 + struct probe_arg *parg, bool is_return, bool is_kprobe) 687 681 { 688 682 const char *t; 689 683 int ret; ··· 709 703 } 710 704 parg->offset = *size; 711 705 *size += parg->type->size; 712 - ret = parse_probe_arg(arg, parg->type, &parg->fetch, is_return); 706 + ret = parse_probe_arg(arg, parg->type, &parg->fetch, is_return, is_kprobe); 713 707 714 708 if (ret >= 0 && t != NULL) 715 709 ret = __parse_bitfield_probe_arg(t, parg->type, &parg->fetch);
+2 -1
kernel/trace/trace_probe.h
··· 66 66 #define TP_FLAG_TRACE 1 67 67 #define TP_FLAG_PROFILE 2 68 68 #define TP_FLAG_REGISTERED 4 69 + #define TP_FLAG_UPROBE 8 69 70 70 71 71 72 /* data_rloc: data relative location, compatible with u32 */ ··· 144 143 } 145 144 146 145 extern int traceprobe_parse_probe_arg(char *arg, ssize_t *size, 147 - struct probe_arg *parg, bool is_return); 146 + struct probe_arg *parg, bool is_return, bool is_kprobe); 148 147 149 148 extern int traceprobe_conflict_field_name(const char *name, 150 149 struct probe_arg *args, int narg);
+788
kernel/trace/trace_uprobe.c
··· 1 + /* 2 + * uprobes-based tracing events 3 + * 4 + * This program is free software; you can redistribute it and/or modify 5 + * it under the terms of the GNU General Public License version 2 as 6 + * published by the Free Software Foundation. 7 + * 8 + * This program is distributed in the hope that it will be useful, 9 + * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 + * GNU General Public License for more details. 12 + * 13 + * You should have received a copy of the GNU General Public License 14 + * along with this program; if not, write to the Free Software 15 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 + * 17 + * Copyright (C) IBM Corporation, 2010-2012 18 + * Author: Srikar Dronamraju <srikar@linux.vnet.ibm.com> 19 + */ 20 + 21 + #include <linux/module.h> 22 + #include <linux/uaccess.h> 23 + #include <linux/uprobes.h> 24 + #include <linux/namei.h> 25 + 26 + #include "trace_probe.h" 27 + 28 + #define UPROBE_EVENT_SYSTEM "uprobes" 29 + 30 + /* 31 + * uprobe event core functions 32 + */ 33 + struct trace_uprobe; 34 + struct uprobe_trace_consumer { 35 + struct uprobe_consumer cons; 36 + struct trace_uprobe *tu; 37 + }; 38 + 39 + struct trace_uprobe { 40 + struct list_head list; 41 + struct ftrace_event_class class; 42 + struct ftrace_event_call call; 43 + struct uprobe_trace_consumer *consumer; 44 + struct inode *inode; 45 + char *filename; 46 + unsigned long offset; 47 + unsigned long nhit; 48 + unsigned int flags; /* For TP_FLAG_* */ 49 + ssize_t size; /* trace entry size */ 50 + unsigned int nr_args; 51 + struct probe_arg args[]; 52 + }; 53 + 54 + #define SIZEOF_TRACE_UPROBE(n) \ 55 + (offsetof(struct trace_uprobe, args) + \ 56 + (sizeof(struct probe_arg) * (n))) 57 + 58 + static int register_uprobe_event(struct trace_uprobe *tu); 59 + static void unregister_uprobe_event(struct trace_uprobe *tu); 60 + 61 + static DEFINE_MUTEX(uprobe_lock); 62 + static LIST_HEAD(uprobe_list); 63 + 64 + static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs); 65 + 66 + /* 67 + * Allocate new trace_uprobe and initialize it (including uprobes). 68 + */ 69 + static struct trace_uprobe * 70 + alloc_trace_uprobe(const char *group, const char *event, int nargs) 71 + { 72 + struct trace_uprobe *tu; 73 + 74 + if (!event || !is_good_name(event)) 75 + return ERR_PTR(-EINVAL); 76 + 77 + if (!group || !is_good_name(group)) 78 + return ERR_PTR(-EINVAL); 79 + 80 + tu = kzalloc(SIZEOF_TRACE_UPROBE(nargs), GFP_KERNEL); 81 + if (!tu) 82 + return ERR_PTR(-ENOMEM); 83 + 84 + tu->call.class = &tu->class; 85 + tu->call.name = kstrdup(event, GFP_KERNEL); 86 + if (!tu->call.name) 87 + goto error; 88 + 89 + tu->class.system = kstrdup(group, GFP_KERNEL); 90 + if (!tu->class.system) 91 + goto error; 92 + 93 + INIT_LIST_HEAD(&tu->list); 94 + return tu; 95 + 96 + error: 97 + kfree(tu->call.name); 98 + kfree(tu); 99 + 100 + return ERR_PTR(-ENOMEM); 101 + } 102 + 103 + static void free_trace_uprobe(struct trace_uprobe *tu) 104 + { 105 + int i; 106 + 107 + for (i = 0; i < tu->nr_args; i++) 108 + traceprobe_free_probe_arg(&tu->args[i]); 109 + 110 + iput(tu->inode); 111 + kfree(tu->call.class->system); 112 + kfree(tu->call.name); 113 + kfree(tu->filename); 114 + kfree(tu); 115 + } 116 + 117 + static struct trace_uprobe *find_probe_event(const char *event, const char *group) 118 + { 119 + struct trace_uprobe *tu; 120 + 121 + list_for_each_entry(tu, &uprobe_list, list) 122 + if (strcmp(tu->call.name, event) == 0 && 123 + strcmp(tu->call.class->system, group) == 0) 124 + return tu; 125 + 126 + return NULL; 127 + } 128 + 129 + /* Unregister a trace_uprobe and probe_event: call with locking uprobe_lock */ 130 + static void unregister_trace_uprobe(struct trace_uprobe *tu) 131 + { 132 + list_del(&tu->list); 133 + unregister_uprobe_event(tu); 134 + free_trace_uprobe(tu); 135 + } 136 + 137 + /* Register a trace_uprobe and probe_event */ 138 + static int register_trace_uprobe(struct trace_uprobe *tu) 139 + { 140 + struct trace_uprobe *old_tp; 141 + int ret; 142 + 143 + mutex_lock(&uprobe_lock); 144 + 145 + /* register as an event */ 146 + old_tp = find_probe_event(tu->call.name, tu->call.class->system); 147 + if (old_tp) 148 + /* delete old event */ 149 + unregister_trace_uprobe(old_tp); 150 + 151 + ret = register_uprobe_event(tu); 152 + if (ret) { 153 + pr_warning("Failed to register probe event(%d)\n", ret); 154 + goto end; 155 + } 156 + 157 + list_add_tail(&tu->list, &uprobe_list); 158 + 159 + end: 160 + mutex_unlock(&uprobe_lock); 161 + 162 + return ret; 163 + } 164 + 165 + /* 166 + * Argument syntax: 167 + * - Add uprobe: p[:[GRP/]EVENT] PATH:SYMBOL[+offs] [FETCHARGS] 168 + * 169 + * - Remove uprobe: -:[GRP/]EVENT 170 + */ 171 + static int create_trace_uprobe(int argc, char **argv) 172 + { 173 + struct trace_uprobe *tu; 174 + struct inode *inode; 175 + char *arg, *event, *group, *filename; 176 + char buf[MAX_EVENT_NAME_LEN]; 177 + struct path path; 178 + unsigned long offset; 179 + bool is_delete; 180 + int i, ret; 181 + 182 + inode = NULL; 183 + ret = 0; 184 + is_delete = false; 185 + event = NULL; 186 + group = NULL; 187 + 188 + /* argc must be >= 1 */ 189 + if (argv[0][0] == '-') 190 + is_delete = true; 191 + else if (argv[0][0] != 'p') { 192 + pr_info("Probe definition must be started with 'p', 'r' or" " '-'.\n"); 193 + return -EINVAL; 194 + } 195 + 196 + if (argv[0][1] == ':') { 197 + event = &argv[0][2]; 198 + arg = strchr(event, '/'); 199 + 200 + if (arg) { 201 + group = event; 202 + event = arg + 1; 203 + event[-1] = '\0'; 204 + 205 + if (strlen(group) == 0) { 206 + pr_info("Group name is not specified\n"); 207 + return -EINVAL; 208 + } 209 + } 210 + if (strlen(event) == 0) { 211 + pr_info("Event name is not specified\n"); 212 + return -EINVAL; 213 + } 214 + } 215 + if (!group) 216 + group = UPROBE_EVENT_SYSTEM; 217 + 218 + if (is_delete) { 219 + if (!event) { 220 + pr_info("Delete command needs an event name.\n"); 221 + return -EINVAL; 222 + } 223 + mutex_lock(&uprobe_lock); 224 + tu = find_probe_event(event, group); 225 + 226 + if (!tu) { 227 + mutex_unlock(&uprobe_lock); 228 + pr_info("Event %s/%s doesn't exist.\n", group, event); 229 + return -ENOENT; 230 + } 231 + /* delete an event */ 232 + unregister_trace_uprobe(tu); 233 + mutex_unlock(&uprobe_lock); 234 + return 0; 235 + } 236 + 237 + if (argc < 2) { 238 + pr_info("Probe point is not specified.\n"); 239 + return -EINVAL; 240 + } 241 + if (isdigit(argv[1][0])) { 242 + pr_info("probe point must be have a filename.\n"); 243 + return -EINVAL; 244 + } 245 + arg = strchr(argv[1], ':'); 246 + if (!arg) 247 + goto fail_address_parse; 248 + 249 + *arg++ = '\0'; 250 + filename = argv[1]; 251 + ret = kern_path(filename, LOOKUP_FOLLOW, &path); 252 + if (ret) 253 + goto fail_address_parse; 254 + 255 + ret = strict_strtoul(arg, 0, &offset); 256 + if (ret) 257 + goto fail_address_parse; 258 + 259 + inode = igrab(path.dentry->d_inode); 260 + 261 + argc -= 2; 262 + argv += 2; 263 + 264 + /* setup a probe */ 265 + if (!event) { 266 + char *tail = strrchr(filename, '/'); 267 + char *ptr; 268 + 269 + ptr = kstrdup((tail ? tail + 1 : filename), GFP_KERNEL); 270 + if (!ptr) { 271 + ret = -ENOMEM; 272 + goto fail_address_parse; 273 + } 274 + 275 + tail = ptr; 276 + ptr = strpbrk(tail, ".-_"); 277 + if (ptr) 278 + *ptr = '\0'; 279 + 280 + snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_0x%lx", 'p', tail, offset); 281 + event = buf; 282 + kfree(tail); 283 + } 284 + 285 + tu = alloc_trace_uprobe(group, event, argc); 286 + if (IS_ERR(tu)) { 287 + pr_info("Failed to allocate trace_uprobe.(%d)\n", (int)PTR_ERR(tu)); 288 + ret = PTR_ERR(tu); 289 + goto fail_address_parse; 290 + } 291 + tu->offset = offset; 292 + tu->inode = inode; 293 + tu->filename = kstrdup(filename, GFP_KERNEL); 294 + 295 + if (!tu->filename) { 296 + pr_info("Failed to allocate filename.\n"); 297 + ret = -ENOMEM; 298 + goto error; 299 + } 300 + 301 + /* parse arguments */ 302 + ret = 0; 303 + for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) { 304 + /* Increment count for freeing args in error case */ 305 + tu->nr_args++; 306 + 307 + /* Parse argument name */ 308 + arg = strchr(argv[i], '='); 309 + if (arg) { 310 + *arg++ = '\0'; 311 + tu->args[i].name = kstrdup(argv[i], GFP_KERNEL); 312 + } else { 313 + arg = argv[i]; 314 + /* If argument name is omitted, set "argN" */ 315 + snprintf(buf, MAX_EVENT_NAME_LEN, "arg%d", i + 1); 316 + tu->args[i].name = kstrdup(buf, GFP_KERNEL); 317 + } 318 + 319 + if (!tu->args[i].name) { 320 + pr_info("Failed to allocate argument[%d] name.\n", i); 321 + ret = -ENOMEM; 322 + goto error; 323 + } 324 + 325 + if (!is_good_name(tu->args[i].name)) { 326 + pr_info("Invalid argument[%d] name: %s\n", i, tu->args[i].name); 327 + ret = -EINVAL; 328 + goto error; 329 + } 330 + 331 + if (traceprobe_conflict_field_name(tu->args[i].name, tu->args, i)) { 332 + pr_info("Argument[%d] name '%s' conflicts with " 333 + "another field.\n", i, argv[i]); 334 + ret = -EINVAL; 335 + goto error; 336 + } 337 + 338 + /* Parse fetch argument */ 339 + ret = traceprobe_parse_probe_arg(arg, &tu->size, &tu->args[i], false, false); 340 + if (ret) { 341 + pr_info("Parse error at argument[%d]. (%d)\n", i, ret); 342 + goto error; 343 + } 344 + } 345 + 346 + ret = register_trace_uprobe(tu); 347 + if (ret) 348 + goto error; 349 + return 0; 350 + 351 + error: 352 + free_trace_uprobe(tu); 353 + return ret; 354 + 355 + fail_address_parse: 356 + if (inode) 357 + iput(inode); 358 + 359 + pr_info("Failed to parse address.\n"); 360 + 361 + return ret; 362 + } 363 + 364 + static void cleanup_all_probes(void) 365 + { 366 + struct trace_uprobe *tu; 367 + 368 + mutex_lock(&uprobe_lock); 369 + while (!list_empty(&uprobe_list)) { 370 + tu = list_entry(uprobe_list.next, struct trace_uprobe, list); 371 + unregister_trace_uprobe(tu); 372 + } 373 + mutex_unlock(&uprobe_lock); 374 + } 375 + 376 + /* Probes listing interfaces */ 377 + static void *probes_seq_start(struct seq_file *m, loff_t *pos) 378 + { 379 + mutex_lock(&uprobe_lock); 380 + return seq_list_start(&uprobe_list, *pos); 381 + } 382 + 383 + static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos) 384 + { 385 + return seq_list_next(v, &uprobe_list, pos); 386 + } 387 + 388 + static void probes_seq_stop(struct seq_file *m, void *v) 389 + { 390 + mutex_unlock(&uprobe_lock); 391 + } 392 + 393 + static int probes_seq_show(struct seq_file *m, void *v) 394 + { 395 + struct trace_uprobe *tu = v; 396 + int i; 397 + 398 + seq_printf(m, "p:%s/%s", tu->call.class->system, tu->call.name); 399 + seq_printf(m, " %s:0x%p", tu->filename, (void *)tu->offset); 400 + 401 + for (i = 0; i < tu->nr_args; i++) 402 + seq_printf(m, " %s=%s", tu->args[i].name, tu->args[i].comm); 403 + 404 + seq_printf(m, "\n"); 405 + return 0; 406 + } 407 + 408 + static const struct seq_operations probes_seq_op = { 409 + .start = probes_seq_start, 410 + .next = probes_seq_next, 411 + .stop = probes_seq_stop, 412 + .show = probes_seq_show 413 + }; 414 + 415 + static int probes_open(struct inode *inode, struct file *file) 416 + { 417 + if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) 418 + cleanup_all_probes(); 419 + 420 + return seq_open(file, &probes_seq_op); 421 + } 422 + 423 + static ssize_t probes_write(struct file *file, const char __user *buffer, 424 + size_t count, loff_t *ppos) 425 + { 426 + return traceprobe_probes_write(file, buffer, count, ppos, create_trace_uprobe); 427 + } 428 + 429 + static const struct file_operations uprobe_events_ops = { 430 + .owner = THIS_MODULE, 431 + .open = probes_open, 432 + .read = seq_read, 433 + .llseek = seq_lseek, 434 + .release = seq_release, 435 + .write = probes_write, 436 + }; 437 + 438 + /* Probes profiling interfaces */ 439 + static int probes_profile_seq_show(struct seq_file *m, void *v) 440 + { 441 + struct trace_uprobe *tu = v; 442 + 443 + seq_printf(m, " %s %-44s %15lu\n", tu->filename, tu->call.name, tu->nhit); 444 + return 0; 445 + } 446 + 447 + static const struct seq_operations profile_seq_op = { 448 + .start = probes_seq_start, 449 + .next = probes_seq_next, 450 + .stop = probes_seq_stop, 451 + .show = probes_profile_seq_show 452 + }; 453 + 454 + static int profile_open(struct inode *inode, struct file *file) 455 + { 456 + return seq_open(file, &profile_seq_op); 457 + } 458 + 459 + static const struct file_operations uprobe_profile_ops = { 460 + .owner = THIS_MODULE, 461 + .open = profile_open, 462 + .read = seq_read, 463 + .llseek = seq_lseek, 464 + .release = seq_release, 465 + }; 466 + 467 + /* uprobe handler */ 468 + static void uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs) 469 + { 470 + struct uprobe_trace_entry_head *entry; 471 + struct ring_buffer_event *event; 472 + struct ring_buffer *buffer; 473 + u8 *data; 474 + int size, i, pc; 475 + unsigned long irq_flags; 476 + struct ftrace_event_call *call = &tu->call; 477 + 478 + tu->nhit++; 479 + 480 + local_save_flags(irq_flags); 481 + pc = preempt_count(); 482 + 483 + size = sizeof(*entry) + tu->size; 484 + 485 + event = trace_current_buffer_lock_reserve(&buffer, call->event.type, 486 + size, irq_flags, pc); 487 + if (!event) 488 + return; 489 + 490 + entry = ring_buffer_event_data(event); 491 + entry->ip = uprobe_get_swbp_addr(task_pt_regs(current)); 492 + data = (u8 *)&entry[1]; 493 + for (i = 0; i < tu->nr_args; i++) 494 + call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset); 495 + 496 + if (!filter_current_check_discard(buffer, call, entry, event)) 497 + trace_buffer_unlock_commit(buffer, event, irq_flags, pc); 498 + } 499 + 500 + /* Event entry printers */ 501 + static enum print_line_t 502 + print_uprobe_event(struct trace_iterator *iter, int flags, struct trace_event *event) 503 + { 504 + struct uprobe_trace_entry_head *field; 505 + struct trace_seq *s = &iter->seq; 506 + struct trace_uprobe *tu; 507 + u8 *data; 508 + int i; 509 + 510 + field = (struct uprobe_trace_entry_head *)iter->ent; 511 + tu = container_of(event, struct trace_uprobe, call.event); 512 + 513 + if (!trace_seq_printf(s, "%s: (", tu->call.name)) 514 + goto partial; 515 + 516 + if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET)) 517 + goto partial; 518 + 519 + if (!trace_seq_puts(s, ")")) 520 + goto partial; 521 + 522 + data = (u8 *)&field[1]; 523 + for (i = 0; i < tu->nr_args; i++) { 524 + if (!tu->args[i].type->print(s, tu->args[i].name, 525 + data + tu->args[i].offset, field)) 526 + goto partial; 527 + } 528 + 529 + if (trace_seq_puts(s, "\n")) 530 + return TRACE_TYPE_HANDLED; 531 + 532 + partial: 533 + return TRACE_TYPE_PARTIAL_LINE; 534 + } 535 + 536 + static int probe_event_enable(struct trace_uprobe *tu, int flag) 537 + { 538 + struct uprobe_trace_consumer *utc; 539 + int ret = 0; 540 + 541 + if (!tu->inode || tu->consumer) 542 + return -EINTR; 543 + 544 + utc = kzalloc(sizeof(struct uprobe_trace_consumer), GFP_KERNEL); 545 + if (!utc) 546 + return -EINTR; 547 + 548 + utc->cons.handler = uprobe_dispatcher; 549 + utc->cons.filter = NULL; 550 + ret = uprobe_register(tu->inode, tu->offset, &utc->cons); 551 + if (ret) { 552 + kfree(utc); 553 + return ret; 554 + } 555 + 556 + tu->flags |= flag; 557 + utc->tu = tu; 558 + tu->consumer = utc; 559 + 560 + return 0; 561 + } 562 + 563 + static void probe_event_disable(struct trace_uprobe *tu, int flag) 564 + { 565 + if (!tu->inode || !tu->consumer) 566 + return; 567 + 568 + uprobe_unregister(tu->inode, tu->offset, &tu->consumer->cons); 569 + tu->flags &= ~flag; 570 + kfree(tu->consumer); 571 + tu->consumer = NULL; 572 + } 573 + 574 + static int uprobe_event_define_fields(struct ftrace_event_call *event_call) 575 + { 576 + int ret, i; 577 + struct uprobe_trace_entry_head field; 578 + struct trace_uprobe *tu = (struct trace_uprobe *)event_call->data; 579 + 580 + DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0); 581 + /* Set argument names as fields */ 582 + for (i = 0; i < tu->nr_args; i++) { 583 + ret = trace_define_field(event_call, tu->args[i].type->fmttype, 584 + tu->args[i].name, 585 + sizeof(field) + tu->args[i].offset, 586 + tu->args[i].type->size, 587 + tu->args[i].type->is_signed, 588 + FILTER_OTHER); 589 + 590 + if (ret) 591 + return ret; 592 + } 593 + return 0; 594 + } 595 + 596 + #define LEN_OR_ZERO (len ? len - pos : 0) 597 + static int __set_print_fmt(struct trace_uprobe *tu, char *buf, int len) 598 + { 599 + const char *fmt, *arg; 600 + int i; 601 + int pos = 0; 602 + 603 + fmt = "(%lx)"; 604 + arg = "REC->" FIELD_STRING_IP; 605 + 606 + /* When len=0, we just calculate the needed length */ 607 + 608 + pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt); 609 + 610 + for (i = 0; i < tu->nr_args; i++) { 611 + pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%s", 612 + tu->args[i].name, tu->args[i].type->fmt); 613 + } 614 + 615 + pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg); 616 + 617 + for (i = 0; i < tu->nr_args; i++) { 618 + pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s", 619 + tu->args[i].name); 620 + } 621 + 622 + return pos; /* return the length of print_fmt */ 623 + } 624 + #undef LEN_OR_ZERO 625 + 626 + static int set_print_fmt(struct trace_uprobe *tu) 627 + { 628 + char *print_fmt; 629 + int len; 630 + 631 + /* First: called with 0 length to calculate the needed length */ 632 + len = __set_print_fmt(tu, NULL, 0); 633 + print_fmt = kmalloc(len + 1, GFP_KERNEL); 634 + if (!print_fmt) 635 + return -ENOMEM; 636 + 637 + /* Second: actually write the @print_fmt */ 638 + __set_print_fmt(tu, print_fmt, len + 1); 639 + tu->call.print_fmt = print_fmt; 640 + 641 + return 0; 642 + } 643 + 644 + #ifdef CONFIG_PERF_EVENTS 645 + /* uprobe profile handler */ 646 + static void uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs) 647 + { 648 + struct ftrace_event_call *call = &tu->call; 649 + struct uprobe_trace_entry_head *entry; 650 + struct hlist_head *head; 651 + u8 *data; 652 + int size, __size, i; 653 + int rctx; 654 + 655 + __size = sizeof(*entry) + tu->size; 656 + size = ALIGN(__size + sizeof(u32), sizeof(u64)); 657 + size -= sizeof(u32); 658 + if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough")) 659 + return; 660 + 661 + preempt_disable(); 662 + 663 + entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx); 664 + if (!entry) 665 + goto out; 666 + 667 + entry->ip = uprobe_get_swbp_addr(task_pt_regs(current)); 668 + data = (u8 *)&entry[1]; 669 + for (i = 0; i < tu->nr_args; i++) 670 + call_fetch(&tu->args[i].fetch, regs, data + tu->args[i].offset); 671 + 672 + head = this_cpu_ptr(call->perf_events); 673 + perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head); 674 + 675 + out: 676 + preempt_enable(); 677 + } 678 + #endif /* CONFIG_PERF_EVENTS */ 679 + 680 + static 681 + int trace_uprobe_register(struct ftrace_event_call *event, enum trace_reg type, void *data) 682 + { 683 + struct trace_uprobe *tu = (struct trace_uprobe *)event->data; 684 + 685 + switch (type) { 686 + case TRACE_REG_REGISTER: 687 + return probe_event_enable(tu, TP_FLAG_TRACE); 688 + 689 + case TRACE_REG_UNREGISTER: 690 + probe_event_disable(tu, TP_FLAG_TRACE); 691 + return 0; 692 + 693 + #ifdef CONFIG_PERF_EVENTS 694 + case TRACE_REG_PERF_REGISTER: 695 + return probe_event_enable(tu, TP_FLAG_PROFILE); 696 + 697 + case TRACE_REG_PERF_UNREGISTER: 698 + probe_event_disable(tu, TP_FLAG_PROFILE); 699 + return 0; 700 + #endif 701 + default: 702 + return 0; 703 + } 704 + return 0; 705 + } 706 + 707 + static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs) 708 + { 709 + struct uprobe_trace_consumer *utc; 710 + struct trace_uprobe *tu; 711 + 712 + utc = container_of(con, struct uprobe_trace_consumer, cons); 713 + tu = utc->tu; 714 + if (!tu || tu->consumer != utc) 715 + return 0; 716 + 717 + if (tu->flags & TP_FLAG_TRACE) 718 + uprobe_trace_func(tu, regs); 719 + 720 + #ifdef CONFIG_PERF_EVENTS 721 + if (tu->flags & TP_FLAG_PROFILE) 722 + uprobe_perf_func(tu, regs); 723 + #endif 724 + return 0; 725 + } 726 + 727 + static struct trace_event_functions uprobe_funcs = { 728 + .trace = print_uprobe_event 729 + }; 730 + 731 + static int register_uprobe_event(struct trace_uprobe *tu) 732 + { 733 + struct ftrace_event_call *call = &tu->call; 734 + int ret; 735 + 736 + /* Initialize ftrace_event_call */ 737 + INIT_LIST_HEAD(&call->class->fields); 738 + call->event.funcs = &uprobe_funcs; 739 + call->class->define_fields = uprobe_event_define_fields; 740 + 741 + if (set_print_fmt(tu) < 0) 742 + return -ENOMEM; 743 + 744 + ret = register_ftrace_event(&call->event); 745 + if (!ret) { 746 + kfree(call->print_fmt); 747 + return -ENODEV; 748 + } 749 + call->flags = 0; 750 + call->class->reg = trace_uprobe_register; 751 + call->data = tu; 752 + ret = trace_add_event_call(call); 753 + 754 + if (ret) { 755 + pr_info("Failed to register uprobe event: %s\n", call->name); 756 + kfree(call->print_fmt); 757 + unregister_ftrace_event(&call->event); 758 + } 759 + 760 + return ret; 761 + } 762 + 763 + static void unregister_uprobe_event(struct trace_uprobe *tu) 764 + { 765 + /* tu->event is unregistered in trace_remove_event_call() */ 766 + trace_remove_event_call(&tu->call); 767 + kfree(tu->call.print_fmt); 768 + tu->call.print_fmt = NULL; 769 + } 770 + 771 + /* Make a trace interface for controling probe points */ 772 + static __init int init_uprobe_trace(void) 773 + { 774 + struct dentry *d_tracer; 775 + 776 + d_tracer = tracing_init_dentry(); 777 + if (!d_tracer) 778 + return 0; 779 + 780 + trace_create_file("uprobe_events", 0644, d_tracer, 781 + NULL, &uprobe_events_ops); 782 + /* Profile interface */ 783 + trace_create_file("uprobe_profile", 0444, d_tracer, 784 + NULL, &uprobe_profile_ops); 785 + return 0; 786 + } 787 + 788 + fs_initcall(init_uprobe_trace);