Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'probes-v6.17' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace

Pull probes updates from Masami Hiramatsu:
"Stack usage reduction for probe events:
- Allocate string buffers from the heap for uprobe, eprobe, kprobe,
and fprobe events to avoid stack overflow
- Allocate traceprobe_parse_context from the heap to prevent
potential stack overflow
- Fix a typo in the above commit

New features for eprobe and tprobe events:
- Add support for arrays in eprobes
- Support multiple tprobes on the same tracepoint

Improve efficiency:
- Register fprobe-events only when it is enabled to reduce overhead
- Register tracepoints for tprobe events only when enabled to resolve
a lock dependency

Code Cleanup:
- Add kerneldoc for traceprobe_parse_event_name() and
__get_insn_slot()
- Sort #include alphabetically in the probes code
- Remove the unused 'mod' field from the tprobe-event
- Clean up the entry-arg storing code in probe-events

Selftest update
- Enable fprobe events before checking enable_functions in selftests"

* tag 'probes-v6.17' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace:
tracing: trace_fprobe: Fix typo of the semicolon
tracing: Have eprobes handle arrays
tracing: probes: Add a kerneldoc for traceprobe_parse_event_name()
tracing: uprobe-event: Allocate string buffers from heap
tracing: eprobe-event: Allocate string buffers from heap
tracing: kprobe-event: Allocate string buffers from heap
tracing: fprobe-event: Allocate string buffers from heap
tracing: probe: Allocate traceprobe_parse_context from heap
tracing: probes: Sort #include alphabetically
kprobes: Add missing kerneldoc for __get_insn_slot
tracing: tprobe-events: Register tracepoint when enable tprobe event
selftests: tracing: Enable fprobe events before checking enable_functions
tracing: fprobe-events: Register fprobe-events only when it is enabled
tracing: tprobe-events: Support multiple tprobes on the same tracepoint
tracing: tprobe-events: Remove mod field from tprobe-event
tracing: probe-events: Cleanup entry-arg storing code

+676 -353
+5
include/linux/fprobe.h
··· 94 94 int register_fprobe_syms(struct fprobe *fp, const char **syms, int num); 95 95 int unregister_fprobe(struct fprobe *fp); 96 96 bool fprobe_is_registered(struct fprobe *fp); 97 + int fprobe_count_ips_from_filter(const char *filter, const char *notfilter); 97 98 #else 98 99 static inline int register_fprobe(struct fprobe *fp, const char *filter, const char *notfilter) 99 100 { ··· 115 114 static inline bool fprobe_is_registered(struct fprobe *fp) 116 115 { 117 116 return false; 117 + } 118 + static inline int fprobe_count_ips_from_filter(const char *filter, const char *notfilter) 119 + { 120 + return -EOPNOTSUPP; 118 121 } 119 122 #endif 120 123
+4
include/linux/module.h
··· 14 14 #include <linux/buildid.h> 15 15 #include <linux/compiler.h> 16 16 #include <linux/cache.h> 17 + #include <linux/cleanup.h> 17 18 #include <linux/kmod.h> 18 19 #include <linux/init.h> 19 20 #include <linux/elf.h> ··· 1018 1017 } 1019 1018 1020 1019 #endif /* CONFIG_MODULES && CONFIG_KALLSYMS */ 1020 + 1021 + /* Define __free(module_put) macro for struct module *. */ 1022 + DEFINE_FREE(module_put, struct module *, if (_T) module_put(_T)) 1021 1023 1022 1024 #endif /* _LINUX_MODULE_H */
+6 -2
kernel/kprobes.c
··· 135 135 static int collect_garbage_slots(struct kprobe_insn_cache *c); 136 136 137 137 /** 138 - * __get_insn_slot() - Find a slot on an executable page for an instruction. 139 - * We allocate an executable page if there's no room on existing ones. 138 + * __get_insn_slot - Find a slot on an executable page for an instruction. 139 + * @c: Pointer to kprobe instruction cache 140 + * 141 + * Description: Locates available slot on existing executable pages, 142 + * allocates an executable page if there's no room on existing ones. 143 + * Return: Pointer to instruction slot on success, NULL on failure. 140 144 */ 141 145 kprobe_opcode_t *__get_insn_slot(struct kprobe_insn_cache *c) 142 146 {
+5
kernel/trace/fprobe.c
··· 648 648 649 649 #define FPROBE_IPS_MAX INT_MAX 650 650 651 + int fprobe_count_ips_from_filter(const char *filter, const char *notfilter) 652 + { 653 + return get_ips_from_filter(filter, notfilter, NULL, NULL, FPROBE_IPS_MAX); 654 + } 655 + 651 656 /** 652 657 * register_fprobe() - Register fprobe to ftrace by pattern. 653 658 * @fp: A fprobe data structure to be registered.
+37 -16
kernel/trace/trace_eprobe.c
··· 9 9 * Copyright (C) 2021, VMware Inc, Tzvetomir Stoyanov tz.stoyanov@gmail.com> 10 10 * 11 11 */ 12 + #include <linux/cleanup.h> 13 + #include <linux/ftrace.h> 12 14 #include <linux/module.h> 13 15 #include <linux/mutex.h> 14 - #include <linux/ftrace.h> 15 16 16 17 #include "trace_dynevent.h" 17 18 #include "trace_probe.h" 18 - #include "trace_probe_tmpl.h" 19 19 #include "trace_probe_kernel.h" 20 + #include "trace_probe_tmpl.h" 20 21 21 22 #define EPROBE_EVENT_SYSTEM "eprobes" 22 23 ··· 344 343 val = *(unsigned int *)addr; 345 344 break; 346 345 default: 347 - if (field->is_signed) 348 - val = *(long *)addr; 349 - else 350 - val = *(unsigned long *)addr; 346 + if (field->size == sizeof(long)) { 347 + if (field->is_signed) 348 + val = *(long *)addr; 349 + else 350 + val = *(unsigned long *)addr; 351 + break; 352 + } 353 + /* This is an array, point to the addr itself */ 354 + val = (unsigned long)addr; 351 355 break; 352 356 } 353 357 return val; ··· 803 797 804 798 static int trace_eprobe_tp_update_arg(struct trace_eprobe *ep, const char *argv[], int i) 805 799 { 806 - struct traceprobe_parse_context ctx = { 807 - .event = ep->event, 808 - .flags = TPARG_FL_KERNEL | TPARG_FL_TEVENT, 809 - }; 800 + struct traceprobe_parse_context *ctx __free(traceprobe_parse_context) = NULL; 810 801 int ret; 811 802 812 - ret = traceprobe_parse_probe_arg(&ep->tp, i, argv[i], &ctx); 803 + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 804 + if (!ctx) 805 + return -ENOMEM; 806 + ctx->event = ep->event; 807 + ctx->flags = TPARG_FL_KERNEL | TPARG_FL_TEVENT; 808 + 809 + ret = traceprobe_parse_probe_arg(&ep->tp, i, argv[i], ctx); 813 810 /* Handle symbols "@" */ 814 811 if (!ret) 815 812 ret = traceprobe_update_arg(&ep->tp.args[i]); 816 813 817 - traceprobe_finish_parse(&ctx); 818 814 return ret; 819 815 } 820 816 ··· 877 869 const char *event = NULL, *group = EPROBE_EVENT_SYSTEM; 878 870 const char *sys_event = NULL, *sys_name = NULL; 879 871 struct trace_event_call *event_call; 872 + char *buf1 __free(kfree) = NULL; 873 + char *buf2 __free(kfree) = NULL; 874 + char *gbuf __free(kfree) = NULL; 880 875 struct trace_eprobe *ep = NULL; 881 - char buf1[MAX_EVENT_NAME_LEN]; 882 - char buf2[MAX_EVENT_NAME_LEN]; 883 - char gbuf[MAX_EVENT_NAME_LEN]; 884 876 int ret = 0, filter_idx = 0; 885 877 int i, filter_cnt; 886 878 ··· 891 883 892 884 event = strchr(&argv[0][1], ':'); 893 885 if (event) { 886 + gbuf = kmalloc(MAX_EVENT_NAME_LEN, GFP_KERNEL); 887 + if (!gbuf) 888 + goto mem_error; 894 889 event++; 895 890 ret = traceprobe_parse_event_name(&event, &group, gbuf, 896 891 event - argv[0]); ··· 903 892 904 893 trace_probe_log_set_index(1); 905 894 sys_event = argv[1]; 895 + 896 + buf2 = kmalloc(MAX_EVENT_NAME_LEN, GFP_KERNEL); 897 + if (!buf2) 898 + goto mem_error; 899 + 906 900 ret = traceprobe_parse_event_name(&sys_event, &sys_name, buf2, 0); 907 901 if (ret || !sys_event || !sys_name) { 908 902 trace_probe_log_err(0, NO_EVENT_INFO); ··· 915 899 } 916 900 917 901 if (!event) { 918 - strscpy(buf1, sys_event, MAX_EVENT_NAME_LEN); 902 + buf1 = kstrdup(sys_event, GFP_KERNEL); 903 + if (!buf1) 904 + goto mem_error; 919 905 event = buf1; 920 906 } 921 907 ··· 990 972 trace_probe_log_clear(); 991 973 return ret; 992 974 975 + mem_error: 976 + ret = -ENOMEM; 977 + goto error; 993 978 parse_error: 994 979 ret = -EINVAL; 995 980 error:
+412 -202
kernel/trace/trace_fprobe.c
··· 4 4 * Copyright (C) 2022 Google LLC. 5 5 */ 6 6 #define pr_fmt(fmt) "trace_fprobe: " fmt 7 - #include <asm/ptrace.h> 8 7 9 8 #include <linux/fprobe.h> 9 + #include <linux/list.h> 10 10 #include <linux/module.h> 11 + #include <linux/mutex.h> 11 12 #include <linux/rculist.h> 12 13 #include <linux/security.h> 13 14 #include <linux/tracepoint.h> 14 15 #include <linux/uaccess.h> 16 + 17 + #include <asm/ptrace.h> 15 18 16 19 #include "trace_dynevent.h" 17 20 #include "trace_probe.h" ··· 24 21 #define FPROBE_EVENT_SYSTEM "fprobes" 25 22 #define TRACEPOINT_EVENT_SYSTEM "tracepoints" 26 23 #define RETHOOK_MAXACTIVE_MAX 4096 27 - #define TRACEPOINT_STUB ERR_PTR(-ENOENT) 28 24 29 25 static int trace_fprobe_create(const char *raw_command); 30 26 static int trace_fprobe_show(struct seq_file *m, struct dyn_event *ev); ··· 40 38 .match = trace_fprobe_match, 41 39 }; 42 40 41 + /* List of tracepoint_user */ 42 + static LIST_HEAD(tracepoint_user_list); 43 + static DEFINE_MUTEX(tracepoint_user_mutex); 44 + 45 + /* While living tracepoint_user, @tpoint can be NULL and @refcount != 0. */ 46 + struct tracepoint_user { 47 + struct list_head list; 48 + const char *name; 49 + struct tracepoint *tpoint; 50 + unsigned int refcount; 51 + }; 52 + 53 + /* NOTE: you must lock tracepoint_user_mutex. */ 54 + #define for_each_tracepoint_user(tuser) \ 55 + list_for_each_entry(tuser, &tracepoint_user_list, list) 56 + 57 + static int tracepoint_user_register(struct tracepoint_user *tuser) 58 + { 59 + struct tracepoint *tpoint = tuser->tpoint; 60 + 61 + if (!tpoint) 62 + return 0; 63 + 64 + return tracepoint_probe_register_prio_may_exist(tpoint, 65 + tpoint->probestub, NULL, 0); 66 + } 67 + 68 + static void tracepoint_user_unregister(struct tracepoint_user *tuser) 69 + { 70 + if (!tuser->tpoint) 71 + return; 72 + 73 + WARN_ON_ONCE(tracepoint_probe_unregister(tuser->tpoint, tuser->tpoint->probestub, NULL)); 74 + tuser->tpoint = NULL; 75 + } 76 + 77 + static unsigned long tracepoint_user_ip(struct tracepoint_user *tuser) 78 + { 79 + if (!tuser->tpoint) 80 + return 0UL; 81 + 82 + return (unsigned long)tuser->tpoint->probestub; 83 + } 84 + 85 + static void __tracepoint_user_free(struct tracepoint_user *tuser) 86 + { 87 + if (!tuser) 88 + return; 89 + kfree(tuser->name); 90 + kfree(tuser); 91 + } 92 + 93 + DEFINE_FREE(tuser_free, struct tracepoint_user *, __tracepoint_user_free(_T)) 94 + 95 + static struct tracepoint_user *__tracepoint_user_init(const char *name, struct tracepoint *tpoint) 96 + { 97 + struct tracepoint_user *tuser __free(tuser_free) = NULL; 98 + int ret; 99 + 100 + tuser = kzalloc(sizeof(*tuser), GFP_KERNEL); 101 + if (!tuser) 102 + return NULL; 103 + tuser->name = kstrdup(name, GFP_KERNEL); 104 + if (!tuser->name) 105 + return NULL; 106 + 107 + if (tpoint) { 108 + ret = tracepoint_user_register(tuser); 109 + if (ret) 110 + return ERR_PTR(ret); 111 + } 112 + 113 + tuser->tpoint = tpoint; 114 + tuser->refcount = 1; 115 + INIT_LIST_HEAD(&tuser->list); 116 + list_add(&tuser->list, &tracepoint_user_list); 117 + 118 + return_ptr(tuser); 119 + } 120 + 121 + static struct tracepoint *find_tracepoint(const char *tp_name, 122 + struct module **tp_mod); 123 + 124 + /* 125 + * Get tracepoint_user if exist, or allocate new one and register it. 126 + * If tracepoint is on a module, get its refcounter too. 127 + * This returns errno or NULL (not loaded yet) or tracepoint_user. 128 + */ 129 + static struct tracepoint_user *tracepoint_user_find_get(const char *name, struct module **pmod) 130 + { 131 + struct module *mod __free(module_put) = NULL; 132 + struct tracepoint_user *tuser; 133 + struct tracepoint *tpoint; 134 + 135 + if (!name || !pmod) 136 + return ERR_PTR(-EINVAL); 137 + 138 + /* Get and lock the module which has tracepoint. */ 139 + tpoint = find_tracepoint(name, &mod); 140 + 141 + guard(mutex)(&tracepoint_user_mutex); 142 + /* Search existing tracepoint_user */ 143 + for_each_tracepoint_user(tuser) { 144 + if (!strcmp(tuser->name, name)) { 145 + tuser->refcount++; 146 + *pmod = no_free_ptr(mod); 147 + return tuser; 148 + } 149 + } 150 + 151 + /* The corresponding tracepoint_user is not found. */ 152 + tuser = __tracepoint_user_init(name, tpoint); 153 + if (!IS_ERR_OR_NULL(tuser)) 154 + *pmod = no_free_ptr(mod); 155 + 156 + return tuser; 157 + } 158 + 159 + static void tracepoint_user_put(struct tracepoint_user *tuser) 160 + { 161 + scoped_guard(mutex, &tracepoint_user_mutex) { 162 + if (--tuser->refcount > 0) 163 + return; 164 + 165 + list_del(&tuser->list); 166 + tracepoint_user_unregister(tuser); 167 + } 168 + 169 + __tracepoint_user_free(tuser); 170 + } 171 + 172 + DEFINE_FREE(tuser_put, struct tracepoint_user *, 173 + if (!IS_ERR_OR_NULL(_T)) 174 + tracepoint_user_put(_T)) 175 + 43 176 /* 44 177 * Fprobe event core functions 178 + */ 179 + 180 + /* 181 + * @tprobe is true for tracepoint probe. 182 + * @tuser can be NULL if the trace_fprobe is disabled or the tracepoint is not 183 + * loaded with a module. If @tuser != NULL, this trace_fprobe is enabled. 45 184 */ 46 185 struct trace_fprobe { 47 186 struct dyn_event devent; 48 187 struct fprobe fp; 49 188 const char *symbol; 50 - struct tracepoint *tpoint; 51 - struct module *mod; 189 + bool tprobe; 190 + struct tracepoint_user *tuser; 52 191 struct trace_probe tp; 53 192 }; 54 193 ··· 219 76 220 77 static bool trace_fprobe_is_tracepoint(struct trace_fprobe *tf) 221 78 { 222 - return tf->tpoint != NULL; 79 + return tf->tprobe; 223 80 } 224 81 225 82 static const char *trace_fprobe_symbol(struct trace_fprobe *tf) ··· 554 411 { 555 412 if (tf) { 556 413 trace_probe_cleanup(&tf->tp); 414 + if (tf->tuser) 415 + tracepoint_user_put(tf->tuser); 557 416 kfree(tf->symbol); 558 417 kfree(tf); 559 418 } ··· 570 425 static struct trace_fprobe *alloc_trace_fprobe(const char *group, 571 426 const char *event, 572 427 const char *symbol, 573 - struct tracepoint *tpoint, 574 - struct module *mod, 575 - int nargs, bool is_return) 428 + int nargs, bool is_return, 429 + bool is_tracepoint) 576 430 { 577 431 struct trace_fprobe *tf __free(free_trace_fprobe) = NULL; 578 432 int ret = -ENOMEM; ··· 589 445 else 590 446 tf->fp.entry_handler = fentry_dispatcher; 591 447 592 - tf->tpoint = tpoint; 593 - tf->mod = mod; 448 + tf->tprobe = is_tracepoint; 594 449 595 450 ret = trace_probe_init(&tf->tp, event, group, false, nargs); 596 451 if (ret < 0) ··· 610 467 strcmp(trace_probe_group_name(&tf->tp), group) == 0) 611 468 return tf; 612 469 return NULL; 613 - } 614 - 615 - static inline int __enable_trace_fprobe(struct trace_fprobe *tf) 616 - { 617 - if (trace_fprobe_is_registered(tf)) 618 - enable_fprobe(&tf->fp); 619 - 620 - return 0; 621 - } 622 - 623 - static void __disable_trace_fprobe(struct trace_probe *tp) 624 - { 625 - struct trace_fprobe *tf; 626 - 627 - list_for_each_entry(tf, trace_probe_probe_list(tp), tp.list) { 628 - if (!trace_fprobe_is_registered(tf)) 629 - continue; 630 - disable_fprobe(&tf->fp); 631 - } 632 - } 633 - 634 - /* 635 - * Enable trace_probe 636 - * if the file is NULL, enable "perf" handler, or enable "trace" handler. 637 - */ 638 - static int enable_trace_fprobe(struct trace_event_call *call, 639 - struct trace_event_file *file) 640 - { 641 - struct trace_probe *tp; 642 - struct trace_fprobe *tf; 643 - bool enabled; 644 - int ret = 0; 645 - 646 - tp = trace_probe_primary_from_call(call); 647 - if (WARN_ON_ONCE(!tp)) 648 - return -ENODEV; 649 - enabled = trace_probe_is_enabled(tp); 650 - 651 - /* This also changes "enabled" state */ 652 - if (file) { 653 - ret = trace_probe_add_file(tp, file); 654 - if (ret) 655 - return ret; 656 - } else 657 - trace_probe_set_flag(tp, TP_FLAG_PROFILE); 658 - 659 - if (!enabled) { 660 - list_for_each_entry(tf, trace_probe_probe_list(tp), tp.list) { 661 - /* TODO: check the fprobe is gone */ 662 - __enable_trace_fprobe(tf); 663 - } 664 - } 665 - 666 - return 0; 667 - } 668 - 669 - /* 670 - * Disable trace_probe 671 - * if the file is NULL, disable "perf" handler, or disable "trace" handler. 672 - */ 673 - static int disable_trace_fprobe(struct trace_event_call *call, 674 - struct trace_event_file *file) 675 - { 676 - struct trace_probe *tp; 677 - 678 - tp = trace_probe_primary_from_call(call); 679 - if (WARN_ON_ONCE(!tp)) 680 - return -ENODEV; 681 - 682 - if (file) { 683 - if (!trace_probe_get_file_link(tp, file)) 684 - return -ENOENT; 685 - if (!trace_probe_has_single_file(tp)) 686 - goto out; 687 - trace_probe_clear_flag(tp, TP_FLAG_TRACE); 688 - } else 689 - trace_probe_clear_flag(tp, TP_FLAG_PROFILE); 690 - 691 - if (!trace_probe_is_enabled(tp)) 692 - __disable_trace_fprobe(tp); 693 - 694 - out: 695 - if (file) 696 - /* 697 - * Synchronization is done in below function. For perf event, 698 - * file == NULL and perf_trace_event_unreg() calls 699 - * tracepoint_synchronize_unregister() to ensure synchronize 700 - * event. We don't need to care about it. 701 - */ 702 - trace_probe_remove_file(tp, file); 703 - 704 - return 0; 705 470 } 706 471 707 472 /* Event entry printers */ ··· 763 712 764 713 static int __regsiter_tracepoint_fprobe(struct trace_fprobe *tf) 765 714 { 766 - struct tracepoint *tpoint = tf->tpoint; 767 - unsigned long ip = (unsigned long)tpoint->probestub; 715 + struct tracepoint_user *tuser __free(tuser_put) = NULL; 716 + struct module *mod __free(module_put) = NULL; 717 + unsigned long ip; 768 718 int ret; 769 719 720 + if (WARN_ON_ONCE(tf->tuser)) 721 + return -EINVAL; 722 + 723 + /* If the tracepoint is in a module, it must be locked in this function. */ 724 + tuser = tracepoint_user_find_get(tf->symbol, &mod); 725 + /* This tracepoint is not loaded yet */ 726 + if (IS_ERR(tuser)) 727 + return PTR_ERR(tuser); 728 + if (!tuser) 729 + return -ENOMEM; 730 + 731 + /* Register fprobe only if the tracepoint is loaded. */ 732 + if (tuser->tpoint) { 733 + ip = tracepoint_user_ip(tuser); 734 + if (WARN_ON_ONCE(!ip)) 735 + return -ENOENT; 736 + 737 + ret = register_fprobe_ips(&tf->fp, &ip, 1); 738 + if (ret < 0) 739 + return ret; 740 + } 741 + 742 + tf->tuser = no_free_ptr(tuser); 743 + return 0; 744 + } 745 + 746 + /* Returns an error if the target function is not available, or 0 */ 747 + static int trace_fprobe_verify_target(struct trace_fprobe *tf) 748 + { 749 + int ret; 750 + 751 + /* Tracepoint should have a stub function. */ 752 + if (trace_fprobe_is_tracepoint(tf)) 753 + return 0; 754 + 770 755 /* 771 - * Here, we do 2 steps to enable fprobe on a tracepoint. 772 - * At first, put __probestub_##TP function on the tracepoint 773 - * and put a fprobe on the stub function. 756 + * Note: since we don't lock the module, even if this succeeded, 757 + * register_fprobe() later can fail. 774 758 */ 775 - ret = tracepoint_probe_register_prio_may_exist(tpoint, 776 - tpoint->probestub, NULL, 0); 777 - if (ret < 0) 778 - return ret; 779 - return register_fprobe_ips(&tf->fp, &ip, 1); 759 + ret = fprobe_count_ips_from_filter(tf->symbol, NULL); 760 + return (ret < 0) ? ret : 0; 780 761 } 781 762 782 763 /* Internal register function - just handle fprobe and flags */ ··· 830 747 return ret; 831 748 } 832 749 833 - /* Set/clear disabled flag according to tp->flag */ 834 - if (trace_probe_is_enabled(&tf->tp)) 835 - tf->fp.flags &= ~FPROBE_FL_DISABLED; 836 - else 837 - tf->fp.flags |= FPROBE_FL_DISABLED; 750 + tf->fp.flags &= ~FPROBE_FL_DISABLED; 838 751 839 - if (trace_fprobe_is_tracepoint(tf)) { 840 - 841 - /* This tracepoint is not loaded yet */ 842 - if (tf->tpoint == TRACEPOINT_STUB) 843 - return 0; 844 - 752 + if (trace_fprobe_is_tracepoint(tf)) 845 753 return __regsiter_tracepoint_fprobe(tf); 846 - } 847 754 848 755 /* TODO: handle filter, nofilter or symbol list */ 849 756 return register_fprobe(&tf->fp, tf->symbol, NULL); ··· 842 769 /* Internal unregister function - just handle fprobe and flags */ 843 770 static void __unregister_trace_fprobe(struct trace_fprobe *tf) 844 771 { 845 - if (trace_fprobe_is_registered(tf)) { 772 + if (trace_fprobe_is_registered(tf)) 846 773 unregister_fprobe(&tf->fp); 847 - memset(&tf->fp, 0, sizeof(tf->fp)); 848 - if (trace_fprobe_is_tracepoint(tf)) { 849 - tracepoint_probe_unregister(tf->tpoint, 850 - tf->tpoint->probestub, NULL); 851 - tf->tpoint = NULL; 852 - tf->mod = NULL; 853 - } 774 + if (tf->tuser) { 775 + tracepoint_user_put(tf->tuser); 776 + tf->tuser = NULL; 854 777 } 855 778 } 856 779 ··· 906 837 return false; 907 838 } 908 839 909 - static int append_trace_fprobe(struct trace_fprobe *tf, struct trace_fprobe *to) 840 + static int append_trace_fprobe_event(struct trace_fprobe *tf, struct trace_fprobe *to) 910 841 { 911 842 int ret; 912 843 ··· 934 865 if (ret) 935 866 return ret; 936 867 937 - ret = __register_trace_fprobe(tf); 868 + ret = trace_fprobe_verify_target(tf); 938 869 if (ret) 939 870 trace_probe_unlink(&tf->tp); 940 871 else ··· 943 874 return ret; 944 875 } 945 876 946 - /* Register a trace_probe and probe_event */ 947 - static int register_trace_fprobe(struct trace_fprobe *tf) 877 + /* Register a trace_probe and probe_event, and check the fprobe is available. */ 878 + static int register_trace_fprobe_event(struct trace_fprobe *tf) 948 879 { 949 880 struct trace_fprobe *old_tf; 950 881 int ret; ··· 954 885 old_tf = find_trace_fprobe(trace_probe_name(&tf->tp), 955 886 trace_probe_group_name(&tf->tp)); 956 887 if (old_tf) 957 - return append_trace_fprobe(tf, old_tf); 888 + return append_trace_fprobe_event(tf, old_tf); 958 889 959 890 /* Register new event */ 960 891 ret = register_fprobe_event(tf); ··· 967 898 return ret; 968 899 } 969 900 970 - /* Register fprobe */ 971 - ret = __register_trace_fprobe(tf); 901 + /* Verify fprobe is sane. */ 902 + ret = trace_fprobe_verify_target(tf); 972 903 if (ret < 0) 973 904 unregister_fprobe_event(tf); 974 905 else ··· 1032 963 } 1033 964 1034 965 #ifdef CONFIG_MODULES 1035 - static void reenable_trace_fprobe(struct trace_fprobe *tf) 1036 - { 1037 - struct trace_probe *tp = &tf->tp; 1038 - 1039 - list_for_each_entry(tf, trace_probe_probe_list(tp), tp.list) { 1040 - __enable_trace_fprobe(tf); 1041 - } 1042 - } 1043 - 1044 966 /* 1045 967 * Find a tracepoint from specified module. In this case, this does not get the 1046 968 * module's refcount. The caller must ensure the module is not freed. ··· 1048 988 return data.tpoint; 1049 989 } 1050 990 991 + /* These are CONFIG_MODULES=y specific functions. */ 992 + static bool tracepoint_user_within_module(struct tracepoint_user *tuser, 993 + struct module *mod) 994 + { 995 + return within_module(tracepoint_user_ip(tuser), mod); 996 + } 997 + 998 + static int tracepoint_user_register_again(struct tracepoint_user *tuser, 999 + struct tracepoint *tpoint) 1000 + { 1001 + tuser->tpoint = tpoint; 1002 + return tracepoint_user_register(tuser); 1003 + } 1004 + 1005 + static void tracepoint_user_unregister_clear(struct tracepoint_user *tuser) 1006 + { 1007 + tracepoint_user_unregister(tuser); 1008 + tuser->tpoint = NULL; 1009 + } 1010 + 1011 + /* module callback for tracepoint_user */ 1051 1012 static int __tracepoint_probe_module_cb(struct notifier_block *self, 1052 1013 unsigned long val, void *data) 1053 1014 { 1054 1015 struct tp_module *tp_mod = data; 1016 + struct tracepoint_user *tuser; 1055 1017 struct tracepoint *tpoint; 1018 + 1019 + if (val != MODULE_STATE_GOING && val != MODULE_STATE_COMING) 1020 + return NOTIFY_DONE; 1021 + 1022 + mutex_lock(&tracepoint_user_mutex); 1023 + for_each_tracepoint_user(tuser) { 1024 + if (val == MODULE_STATE_COMING) { 1025 + /* This is not a tracepoint in this module. Skip it. */ 1026 + tpoint = find_tracepoint_in_module(tp_mod->mod, tuser->name); 1027 + if (!tpoint) 1028 + continue; 1029 + WARN_ON_ONCE(tracepoint_user_register_again(tuser, tpoint)); 1030 + } else if (val == MODULE_STATE_GOING && 1031 + tracepoint_user_within_module(tuser, tp_mod->mod)) { 1032 + /* Unregister all tracepoint_user in this module. */ 1033 + tracepoint_user_unregister_clear(tuser); 1034 + } 1035 + } 1036 + mutex_unlock(&tracepoint_user_mutex); 1037 + 1038 + return NOTIFY_DONE; 1039 + } 1040 + 1041 + static struct notifier_block tracepoint_module_nb = { 1042 + .notifier_call = __tracepoint_probe_module_cb, 1043 + }; 1044 + 1045 + /* module callback for tprobe events */ 1046 + static int __tprobe_event_module_cb(struct notifier_block *self, 1047 + unsigned long val, void *data) 1048 + { 1056 1049 struct trace_fprobe *tf; 1057 1050 struct dyn_event *pos; 1051 + struct module *mod = data; 1058 1052 1059 1053 if (val != MODULE_STATE_GOING && val != MODULE_STATE_COMING) 1060 1054 return NOTIFY_DONE; 1061 1055 1062 1056 mutex_lock(&event_mutex); 1063 1057 for_each_trace_fprobe(tf, pos) { 1064 - if (val == MODULE_STATE_COMING && tf->tpoint == TRACEPOINT_STUB) { 1065 - tpoint = find_tracepoint_in_module(tp_mod->mod, tf->symbol); 1066 - if (tpoint) { 1067 - tf->tpoint = tpoint; 1068 - tf->mod = tp_mod->mod; 1069 - if (!WARN_ON_ONCE(__regsiter_tracepoint_fprobe(tf)) && 1070 - trace_probe_is_enabled(&tf->tp)) 1071 - reenable_trace_fprobe(tf); 1072 - } 1073 - } else if (val == MODULE_STATE_GOING && tp_mod->mod == tf->mod) { 1058 + /* Skip fprobe and disabled tprobe events. */ 1059 + if (!trace_fprobe_is_tracepoint(tf) || !tf->tuser) 1060 + continue; 1061 + 1062 + /* Before this notification, tracepoint notifier has already done. */ 1063 + if (val == MODULE_STATE_COMING && 1064 + tracepoint_user_within_module(tf->tuser, mod)) { 1065 + unsigned long ip = tracepoint_user_ip(tf->tuser); 1066 + 1067 + WARN_ON_ONCE(register_fprobe_ips(&tf->fp, &ip, 1)); 1068 + } else if (val == MODULE_STATE_GOING && 1069 + /* 1070 + * tracepoint_user_within_module() does not work here because 1071 + * tracepoint_user is already unregistered and cleared tpoint. 1072 + * Instead, checking whether the fprobe is registered but 1073 + * tpoint is cleared(unregistered). Such unbalance probes 1074 + * must be adjusted anyway. 1075 + */ 1076 + trace_fprobe_is_registered(tf) && 1077 + !tf->tuser->tpoint) { 1074 1078 unregister_fprobe(&tf->fp); 1075 - if (trace_fprobe_is_tracepoint(tf)) { 1076 - tracepoint_probe_unregister(tf->tpoint, 1077 - tf->tpoint->probestub, NULL); 1078 - tf->tpoint = TRACEPOINT_STUB; 1079 - tf->mod = NULL; 1080 - } 1081 1079 } 1082 1080 } 1083 1081 mutex_unlock(&event_mutex); ··· 1143 1025 return NOTIFY_DONE; 1144 1026 } 1145 1027 1146 - static struct notifier_block tracepoint_module_nb = { 1147 - .notifier_call = __tracepoint_probe_module_cb, 1028 + /* NOTE: this must be called after tracepoint callback */ 1029 + static struct notifier_block tprobe_event_module_nb = { 1030 + .notifier_call = __tprobe_event_module_cb, 1031 + /* Make sure this is later than tracepoint module notifier. */ 1032 + .priority = -10, 1148 1033 }; 1149 1034 #endif /* CONFIG_MODULES */ 1150 1035 ··· 1207 1086 return 0; 1208 1087 } 1209 1088 1210 - DEFINE_FREE(module_put, struct module *, if (_T) module_put(_T)) 1211 - 1212 1089 static int trace_fprobe_create_internal(int argc, const char *argv[], 1213 1090 struct traceprobe_parse_context *ctx) 1214 1091 { ··· 1235 1116 * FETCHARG:TYPE : use TYPE instead of unsigned long. 1236 1117 */ 1237 1118 struct trace_fprobe *tf __free(free_trace_fprobe) = NULL; 1238 - int i, new_argc = 0, ret = 0; 1239 - bool is_return = false; 1240 - char *symbol __free(kfree) = NULL; 1241 1119 const char *event = NULL, *group = FPROBE_EVENT_SYSTEM; 1120 + struct module *mod __free(module_put) = NULL; 1242 1121 const char **new_argv __free(kfree) = NULL; 1243 - char buf[MAX_EVENT_NAME_LEN]; 1244 - char gbuf[MAX_EVENT_NAME_LEN]; 1245 - char sbuf[KSYM_NAME_LEN]; 1246 - char abuf[MAX_BTF_ARGS_LEN]; 1122 + char *symbol __free(kfree) = NULL; 1123 + char *ebuf __free(kfree) = NULL; 1124 + char *gbuf __free(kfree) = NULL; 1125 + char *sbuf __free(kfree) = NULL; 1126 + char *abuf __free(kfree) = NULL; 1247 1127 char *dbuf __free(kfree) = NULL; 1128 + int i, new_argc = 0, ret = 0; 1248 1129 bool is_tracepoint = false; 1249 - struct module *tp_mod __free(module_put) = NULL; 1250 - struct tracepoint *tpoint = NULL; 1130 + bool is_return = false; 1251 1131 1252 1132 if ((argv[0][0] != 'f' && argv[0][0] != 't') || argc < 2) 1253 1133 return -ECANCELED; ··· 1274 1156 1275 1157 trace_probe_log_set_index(0); 1276 1158 if (event) { 1159 + gbuf = kmalloc(MAX_EVENT_NAME_LEN, GFP_KERNEL); 1160 + if (!gbuf) 1161 + return -ENOMEM; 1277 1162 ret = traceprobe_parse_event_name(&event, &group, gbuf, 1278 1163 event - argv[0]); 1279 1164 if (ret) ··· 1284 1163 } 1285 1164 1286 1165 if (!event) { 1166 + ebuf = kmalloc(MAX_EVENT_NAME_LEN, GFP_KERNEL); 1167 + if (!ebuf) 1168 + return -ENOMEM; 1287 1169 /* Make a new event name */ 1288 1170 if (is_tracepoint) 1289 - snprintf(buf, MAX_EVENT_NAME_LEN, "%s%s", 1171 + snprintf(ebuf, MAX_EVENT_NAME_LEN, "%s%s", 1290 1172 isdigit(*symbol) ? "_" : "", symbol); 1291 1173 else 1292 - snprintf(buf, MAX_EVENT_NAME_LEN, "%s__%s", symbol, 1174 + snprintf(ebuf, MAX_EVENT_NAME_LEN, "%s__%s", symbol, 1293 1175 is_return ? "exit" : "entry"); 1294 - sanitize_event_name(buf); 1295 - event = buf; 1176 + sanitize_event_name(ebuf); 1177 + event = ebuf; 1296 1178 } 1297 1179 1298 1180 if (is_return) ··· 1303 1179 else 1304 1180 ctx->flags |= TPARG_FL_FENTRY; 1305 1181 1182 + ctx->funcname = NULL; 1306 1183 if (is_tracepoint) { 1184 + /* Get tracepoint and lock its module until the end of the registration. */ 1185 + struct tracepoint *tpoint; 1186 + 1307 1187 ctx->flags |= TPARG_FL_TPOINT; 1308 - tpoint = find_tracepoint(symbol, &tp_mod); 1188 + mod = NULL; 1189 + tpoint = find_tracepoint(symbol, &mod); 1309 1190 if (tpoint) { 1310 - ctx->funcname = kallsyms_lookup( 1311 - (unsigned long)tpoint->probestub, 1312 - NULL, NULL, NULL, sbuf); 1313 - } else if (IS_ENABLED(CONFIG_MODULES)) { 1314 - /* This *may* be loaded afterwards */ 1315 - tpoint = TRACEPOINT_STUB; 1316 - ctx->funcname = symbol; 1317 - } else { 1318 - trace_probe_log_set_index(1); 1319 - trace_probe_log_err(0, NO_TRACEPOINT); 1320 - return -EINVAL; 1191 + sbuf = kmalloc(KSYM_NAME_LEN, GFP_KERNEL); 1192 + if (!sbuf) 1193 + return -ENOMEM; 1194 + ctx->funcname = kallsyms_lookup((unsigned long)tpoint->probestub, 1195 + NULL, NULL, NULL, sbuf); 1321 1196 } 1322 - } else 1197 + } 1198 + if (!ctx->funcname) 1323 1199 ctx->funcname = symbol; 1324 1200 1201 + abuf = kmalloc(MAX_BTF_ARGS_LEN, GFP_KERNEL); 1202 + if (!abuf) 1203 + return -ENOMEM; 1325 1204 argc -= 2; argv += 2; 1326 1205 new_argv = traceprobe_expand_meta_args(argc, argv, &new_argc, 1327 1206 abuf, MAX_BTF_ARGS_LEN, ctx); ··· 1345 1218 return ret; 1346 1219 1347 1220 /* setup a probe */ 1348 - tf = alloc_trace_fprobe(group, event, symbol, tpoint, tp_mod, 1349 - argc, is_return); 1221 + tf = alloc_trace_fprobe(group, event, symbol, argc, is_return, is_tracepoint); 1350 1222 if (IS_ERR(tf)) { 1351 1223 ret = PTR_ERR(tf); 1352 1224 /* This must return -ENOMEM, else there is a bug */ ··· 1377 1251 if (ret < 0) 1378 1252 return ret; 1379 1253 1380 - ret = register_trace_fprobe(tf); 1254 + ret = register_trace_fprobe_event(tf); 1381 1255 if (ret) { 1382 1256 trace_probe_log_set_index(1); 1383 1257 if (ret == -EILSEQ) ··· 1397 1271 1398 1272 static int trace_fprobe_create_cb(int argc, const char *argv[]) 1399 1273 { 1400 - struct traceprobe_parse_context ctx = { 1401 - .flags = TPARG_FL_KERNEL | TPARG_FL_FPROBE, 1402 - }; 1274 + struct traceprobe_parse_context *ctx __free(traceprobe_parse_context) = NULL; 1403 1275 int ret; 1404 1276 1277 + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 1278 + if (!ctx) 1279 + return -ENOMEM; 1280 + 1281 + ctx->flags = TPARG_FL_KERNEL | TPARG_FL_FPROBE; 1282 + 1405 1283 trace_probe_log_init("trace_fprobe", argc, argv); 1406 - ret = trace_fprobe_create_internal(argc, argv, &ctx); 1407 - traceprobe_finish_parse(&ctx); 1284 + ret = trace_fprobe_create_internal(argc, argv, ctx); 1408 1285 trace_probe_log_clear(); 1409 1286 return ret; 1410 1287 } ··· 1445 1316 for (i = 0; i < tf->tp.nr_args; i++) 1446 1317 seq_printf(m, " %s=%s", tf->tp.args[i].name, tf->tp.args[i].comm); 1447 1318 seq_putc(m, '\n'); 1319 + 1320 + return 0; 1321 + } 1322 + 1323 + /* 1324 + * Enable trace_probe 1325 + * if the file is NULL, enable "perf" handler, or enable "trace" handler. 1326 + */ 1327 + static int enable_trace_fprobe(struct trace_event_call *call, 1328 + struct trace_event_file *file) 1329 + { 1330 + struct trace_probe *tp; 1331 + struct trace_fprobe *tf; 1332 + bool enabled; 1333 + int ret = 0; 1334 + 1335 + tp = trace_probe_primary_from_call(call); 1336 + if (WARN_ON_ONCE(!tp)) 1337 + return -ENODEV; 1338 + enabled = trace_probe_is_enabled(tp); 1339 + 1340 + /* This also changes "enabled" state */ 1341 + if (file) { 1342 + ret = trace_probe_add_file(tp, file); 1343 + if (ret) 1344 + return ret; 1345 + } else 1346 + trace_probe_set_flag(tp, TP_FLAG_PROFILE); 1347 + 1348 + if (!enabled) { 1349 + list_for_each_entry(tf, trace_probe_probe_list(tp), tp.list) { 1350 + ret = __register_trace_fprobe(tf); 1351 + if (ret < 0) 1352 + return ret; 1353 + } 1354 + } 1355 + 1356 + return 0; 1357 + } 1358 + 1359 + /* 1360 + * Disable trace_probe 1361 + * if the file is NULL, disable "perf" handler, or disable "trace" handler. 1362 + */ 1363 + static int disable_trace_fprobe(struct trace_event_call *call, 1364 + struct trace_event_file *file) 1365 + { 1366 + struct trace_fprobe *tf; 1367 + struct trace_probe *tp; 1368 + 1369 + tp = trace_probe_primary_from_call(call); 1370 + if (WARN_ON_ONCE(!tp)) 1371 + return -ENODEV; 1372 + 1373 + if (file) { 1374 + if (!trace_probe_get_file_link(tp, file)) 1375 + return -ENOENT; 1376 + if (!trace_probe_has_single_file(tp)) 1377 + goto out; 1378 + trace_probe_clear_flag(tp, TP_FLAG_TRACE); 1379 + } else 1380 + trace_probe_clear_flag(tp, TP_FLAG_PROFILE); 1381 + 1382 + if (!trace_probe_is_enabled(tp)) { 1383 + list_for_each_entry(tf, trace_probe_probe_list(tp), tp.list) { 1384 + unregister_fprobe(&tf->fp); 1385 + } 1386 + } 1387 + 1388 + out: 1389 + if (file) 1390 + /* 1391 + * Synchronization is done in below function. For perf event, 1392 + * file == NULL and perf_trace_event_unreg() calls 1393 + * tracepoint_synchronize_unregister() to ensure synchronize 1394 + * event. We don't need to care about it. 1395 + */ 1396 + trace_probe_remove_file(tp, file); 1448 1397 1449 1398 return 0; 1450 1399 } ··· 1570 1363 1571 1364 #ifdef CONFIG_MODULES 1572 1365 ret = register_tracepoint_module_notifier(&tracepoint_module_nb); 1366 + if (ret) 1367 + return ret; 1368 + ret = register_module_notifier(&tprobe_event_module_nb); 1573 1369 if (ret) 1574 1370 return ret; 1575 1371 #endif
+40 -25
kernel/trace/trace_kprobe.c
··· 9 9 10 10 #include <linux/bpf-cgroup.h> 11 11 #include <linux/cleanup.h> 12 - #include <linux/security.h> 13 - #include <linux/module.h> 14 - #include <linux/uaccess.h> 15 - #include <linux/rculist.h> 16 12 #include <linux/error-injection.h> 13 + #include <linux/module.h> 14 + #include <linux/rculist.h> 15 + #include <linux/security.h> 16 + #include <linux/uaccess.h> 17 17 18 18 #include <asm/setup.h> /* for COMMAND_LINE_SIZE */ 19 19 20 20 #include "trace_dynevent.h" 21 21 #include "trace_kprobe_selftest.h" 22 22 #include "trace_probe.h" 23 - #include "trace_probe_tmpl.h" 24 23 #include "trace_probe_kernel.h" 24 + #include "trace_probe_tmpl.h" 25 25 26 26 #define KPROBE_EVENT_SYSTEM "kprobes" 27 27 #define KRETPROBE_MAXACTIVE_MAX 4096 ··· 861 861 * FETCHARG:TYPE : use TYPE instead of unsigned long. 862 862 */ 863 863 struct trace_kprobe *tk __free(free_trace_kprobe) = NULL; 864 - int i, len, new_argc = 0, ret = 0; 865 - bool is_return = false; 866 - char *symbol __free(kfree) = NULL; 867 - char *tmp = NULL; 868 - const char **new_argv __free(kfree) = NULL; 869 864 const char *event = NULL, *group = KPROBE_EVENT_SYSTEM; 870 - enum probe_print_type ptype; 871 - int maxactive = 0; 872 - long offset = 0; 873 - void *addr = NULL; 874 - char buf[MAX_EVENT_NAME_LEN]; 875 - char gbuf[MAX_EVENT_NAME_LEN]; 876 - char abuf[MAX_BTF_ARGS_LEN]; 865 + const char **new_argv __free(kfree) = NULL; 866 + int i, len, new_argc = 0, ret = 0; 867 + char *symbol __free(kfree) = NULL; 868 + char *ebuf __free(kfree) = NULL; 869 + char *gbuf __free(kfree) = NULL; 870 + char *abuf __free(kfree) = NULL; 877 871 char *dbuf __free(kfree) = NULL; 872 + enum probe_print_type ptype; 873 + bool is_return = false; 874 + int maxactive = 0; 875 + void *addr = NULL; 876 + char *tmp = NULL; 877 + long offset = 0; 878 878 879 879 switch (argv[0][0]) { 880 880 case 'r': ··· 893 893 event++; 894 894 895 895 if (isdigit(argv[0][1])) { 896 + char *buf __free(kfree) = NULL; 897 + 896 898 if (!is_return) { 897 899 trace_probe_log_err(1, BAD_MAXACT_TYPE); 898 900 return -EINVAL; ··· 907 905 trace_probe_log_err(1, BAD_MAXACT); 908 906 return -EINVAL; 909 907 } 910 - memcpy(buf, &argv[0][1], len); 908 + buf = kmemdup(&argv[0][1], len + 1, GFP_KERNEL); 911 909 buf[len] = '\0'; 912 910 ret = kstrtouint(buf, 0, &maxactive); 913 911 if (ret || !maxactive) { ··· 975 973 976 974 trace_probe_log_set_index(0); 977 975 if (event) { 976 + gbuf = kmalloc(MAX_EVENT_NAME_LEN, GFP_KERNEL); 977 + if (!gbuf) 978 + return -ENOMEM; 978 979 ret = traceprobe_parse_event_name(&event, &group, gbuf, 979 980 event - argv[0]); 980 981 if (ret) ··· 986 981 987 982 if (!event) { 988 983 /* Make a new event name */ 984 + ebuf = kmalloc(MAX_EVENT_NAME_LEN, GFP_KERNEL); 985 + if (!ebuf) 986 + return -ENOMEM; 989 987 if (symbol) 990 - snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_%ld", 988 + snprintf(ebuf, MAX_EVENT_NAME_LEN, "%c_%s_%ld", 991 989 is_return ? 'r' : 'p', symbol, offset); 992 990 else 993 - snprintf(buf, MAX_EVENT_NAME_LEN, "%c_0x%p", 991 + snprintf(ebuf, MAX_EVENT_NAME_LEN, "%c_0x%p", 994 992 is_return ? 'r' : 'p', addr); 995 - sanitize_event_name(buf); 996 - event = buf; 993 + sanitize_event_name(ebuf); 994 + event = ebuf; 997 995 } 998 996 997 + abuf = kmalloc(MAX_BTF_ARGS_LEN, GFP_KERNEL); 998 + if (!abuf) 999 + return -ENOMEM; 999 1000 argc -= 2; argv += 2; 1000 1001 ctx->funcname = symbol; 1001 1002 new_argv = traceprobe_expand_meta_args(argc, argv, &new_argc, ··· 1076 1065 1077 1066 static int trace_kprobe_create_cb(int argc, const char *argv[]) 1078 1067 { 1079 - struct traceprobe_parse_context ctx = { .flags = TPARG_FL_KERNEL }; 1068 + struct traceprobe_parse_context *ctx __free(traceprobe_parse_context) = NULL; 1080 1069 int ret; 1070 + 1071 + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 1072 + if (!ctx) 1073 + return -ENOMEM; 1074 + ctx->flags = TPARG_FL_KERNEL; 1081 1075 1082 1076 trace_probe_log_init("trace_kprobe", argc, argv); 1083 1077 1084 - ret = trace_kprobe_create_internal(argc, argv, &ctx); 1078 + ret = trace_kprobe_create_internal(argc, argv, ctx); 1085 1079 1086 - traceprobe_finish_parse(&ctx); 1087 1080 trace_probe_log_clear(); 1088 1081 return ret; 1089 1082 }
+89 -61
kernel/trace/trace_probe.c
··· 13 13 14 14 #include <linux/bpf.h> 15 15 #include <linux/fs.h> 16 - #include "trace_btf.h" 17 16 17 + #include "trace_btf.h" 18 18 #include "trace_probe.h" 19 19 20 20 #undef C ··· 247 247 return 0; 248 248 } 249 249 250 - /* @buf must has MAX_EVENT_NAME_LEN size */ 250 + /** 251 + * traceprobe_parse_event_name() - Parse a string into group and event names 252 + * @pevent: A pointer to the string to be parsed. 253 + * @pgroup: A pointer to the group name. 254 + * @buf: A buffer to store the parsed group name. 255 + * @offset: The offset of the string in the original user command, for logging. 256 + * 257 + * This parses a string with the format `[GROUP/][EVENT]` or `[GROUP.][EVENT]` 258 + * (either GROUP or EVENT or both must be specified). 259 + * Since the parsed group name is stored in @buf, the caller must ensure @buf 260 + * is at least MAX_EVENT_NAME_LEN bytes. 261 + * 262 + * Return: 0 on success, or -EINVAL on failure. 263 + * 264 + * If success, *@pevent is updated to point to the event name part of the 265 + * original string, or NULL if there is no event name. 266 + * Also, *@pgroup is updated to point to the parsed group which is stored 267 + * in @buf, or NULL if there is no group name. 268 + */ 251 269 int traceprobe_parse_event_name(const char **pevent, const char **pgroup, 252 270 char *buf, int offset) 253 271 { ··· 797 779 798 780 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API 799 781 782 + static void store_entry_arg_at(struct fetch_insn *code, int argnum, int offset) 783 + { 784 + code[0].op = FETCH_OP_ARG; 785 + code[0].param = argnum; 786 + code[1].op = FETCH_OP_ST_EDATA; 787 + code[1].offset = offset; 788 + } 789 + 790 + static int get_entry_arg_max_offset(struct probe_entry_arg *earg) 791 + { 792 + int i, max_offset = 0; 793 + 794 + /* 795 + * earg->code[] array has an operation sequence which is run in 796 + * the entry handler. 797 + * The sequence stopped by FETCH_OP_END and each data stored in 798 + * the entry data buffer by FETCH_OP_ST_EDATA. The FETCH_OP_ST_EDATA 799 + * stores the data at the data buffer + its offset, and all data are 800 + * "unsigned long" size. The offset must be increased when a data is 801 + * stored. Thus we need to find the last FETCH_OP_ST_EDATA in the 802 + * code array. 803 + */ 804 + for (i = 0; i < earg->size - 1 && earg->code[i].op != FETCH_OP_END; i++) { 805 + if (earg->code[i].op == FETCH_OP_ST_EDATA) 806 + if (earg->code[i].offset > max_offset) 807 + max_offset = earg->code[i].offset; 808 + } 809 + return max_offset; 810 + } 811 + 800 812 /* 801 813 * Add the entry code to store the 'argnum'th parameter and return the offset 802 814 * in the entry data buffer where the data will be stored. ··· 834 786 static int __store_entry_arg(struct trace_probe *tp, int argnum) 835 787 { 836 788 struct probe_entry_arg *earg = tp->entry_arg; 837 - bool match = false; 838 - int i, offset; 789 + int i, offset, last_offset = 0; 839 790 840 791 if (!earg) { 841 792 earg = kzalloc(sizeof(*tp->entry_arg), GFP_KERNEL); ··· 851 804 for (i = 0; i < earg->size; i++) 852 805 earg->code[i].op = FETCH_OP_END; 853 806 tp->entry_arg = earg; 807 + store_entry_arg_at(earg->code, argnum, 0); 808 + return 0; 854 809 } 855 810 856 811 /* 857 - * The entry code array is repeating the pair of 858 - * [FETCH_OP_ARG(argnum)][FETCH_OP_ST_EDATA(offset of entry data buffer)] 859 - * and the rest of entries are filled with [FETCH_OP_END]. 812 + * NOTE: if anyone change the following rule, please rewrite this. 813 + * The entry code array is filled with the pair of 860 814 * 861 - * To reduce the redundant function parameter fetching, we scan the entry 862 - * code array to find the FETCH_OP_ARG which already fetches the 'argnum' 863 - * parameter. If it doesn't match, update 'offset' to find the last 864 - * offset. 865 - * If we find the FETCH_OP_END without matching FETCH_OP_ARG entry, we 866 - * will save the entry with FETCH_OP_ARG and FETCH_OP_ST_EDATA, and 867 - * return data offset so that caller can find the data offset in the entry 868 - * data buffer. 815 + * [FETCH_OP_ARG(argnum)] 816 + * [FETCH_OP_ST_EDATA(offset of entry data buffer)] 817 + * 818 + * and the rest of entries are filled with [FETCH_OP_END]. 819 + * The offset should be incremented, thus the last pair should 820 + * have the largest offset. 869 821 */ 870 - offset = 0; 871 - for (i = 0; i < earg->size - 1; i++) { 872 - switch (earg->code[i].op) { 873 - case FETCH_OP_END: 874 - earg->code[i].op = FETCH_OP_ARG; 875 - earg->code[i].param = argnum; 876 - earg->code[i + 1].op = FETCH_OP_ST_EDATA; 877 - earg->code[i + 1].offset = offset; 878 - return offset; 879 - case FETCH_OP_ARG: 880 - match = (earg->code[i].param == argnum); 881 - break; 882 - case FETCH_OP_ST_EDATA: 883 - offset = earg->code[i].offset; 884 - if (match) 885 - return offset; 886 - offset += sizeof(unsigned long); 887 - break; 888 - default: 889 - break; 890 - } 822 + 823 + /* Search the offset for the sprcified argnum. */ 824 + for (i = 0; i < earg->size - 1 && earg->code[i].op != FETCH_OP_END; i += 2) { 825 + if (WARN_ON_ONCE(earg->code[i].op != FETCH_OP_ARG)) 826 + return -EINVAL; 827 + 828 + if (earg->code[i].param != argnum) 829 + continue; 830 + 831 + if (WARN_ON_ONCE(earg->code[i + 1].op != FETCH_OP_ST_EDATA)) 832 + return -EINVAL; 833 + 834 + return earg->code[i + 1].offset; 891 835 } 892 - return -ENOSPC; 836 + /* Not found, append new entry if possible. */ 837 + if (i >= earg->size - 1) 838 + return -ENOSPC; 839 + 840 + /* The last entry must have the largest offset. */ 841 + if (i != 0) { 842 + if (WARN_ON_ONCE(earg->code[i - 1].op != FETCH_OP_ST_EDATA)) 843 + return -EINVAL; 844 + last_offset = earg->code[i - 1].offset; 845 + } 846 + 847 + offset = last_offset + sizeof(unsigned long); 848 + store_entry_arg_at(&earg->code[i], argnum, offset); 849 + return offset; 893 850 } 894 851 895 852 int traceprobe_get_entry_data_size(struct trace_probe *tp) 896 853 { 897 854 struct probe_entry_arg *earg = tp->entry_arg; 898 - int i, size = 0; 899 855 900 856 if (!earg) 901 857 return 0; 902 858 903 - /* 904 - * earg->code[] array has an operation sequence which is run in 905 - * the entry handler. 906 - * The sequence stopped by FETCH_OP_END and each data stored in 907 - * the entry data buffer by FETCH_OP_ST_EDATA. The FETCH_OP_ST_EDATA 908 - * stores the data at the data buffer + its offset, and all data are 909 - * "unsigned long" size. The offset must be increased when a data is 910 - * stored. Thus we need to find the last FETCH_OP_ST_EDATA in the 911 - * code array. 912 - */ 913 - for (i = 0; i < earg->size; i++) { 914 - switch (earg->code[i].op) { 915 - case FETCH_OP_END: 916 - goto out; 917 - case FETCH_OP_ST_EDATA: 918 - size = earg->code[i].offset + sizeof(unsigned long); 919 - break; 920 - default: 921 - break; 922 - } 923 - } 924 - out: 925 - return size; 859 + return get_entry_arg_max_offset(earg) + sizeof(unsigned long); 926 860 } 927 861 928 862 void store_trace_entry_data(void *edata, struct trace_probe *tp, struct pt_regs *regs)
+18 -8
kernel/trace/trace_probe.h
··· 10 10 * Author: Srikar Dronamraju 11 11 */ 12 12 13 + #include <linux/bitops.h> 14 + #include <linux/btf.h> 15 + #include <linux/cleanup.h> 16 + #include <linux/kprobes.h> 17 + #include <linux/limits.h> 18 + #include <linux/perf_event.h> 19 + #include <linux/ptrace.h> 13 20 #include <linux/seq_file.h> 14 21 #include <linux/slab.h> 15 22 #include <linux/smp.h> 23 + #include <linux/string.h> 24 + #include <linux/stringify.h> 16 25 #include <linux/tracefs.h> 17 26 #include <linux/types.h> 18 - #include <linux/string.h> 19 - #include <linux/ptrace.h> 20 - #include <linux/perf_event.h> 21 - #include <linux/kprobes.h> 22 - #include <linux/stringify.h> 23 - #include <linux/limits.h> 24 27 #include <linux/uaccess.h> 25 - #include <linux/bitops.h> 26 - #include <linux/btf.h> 28 + 27 29 #include <asm/bitsperlong.h> 28 30 29 31 #include "trace.h" ··· 440 438 * this MUST be called for clean up the context and return a resource. 441 439 */ 442 440 void traceprobe_finish_parse(struct traceprobe_parse_context *ctx); 441 + static inline void traceprobe_free_parse_ctx(struct traceprobe_parse_context *ctx) 442 + { 443 + traceprobe_finish_parse(ctx); 444 + kfree(ctx); 445 + } 446 + 447 + DEFINE_FREE(traceprobe_parse_context, struct traceprobe_parse_context *, 448 + if (_T) traceprobe_free_parse_ctx(_T)) 443 449 444 450 extern int traceprobe_split_symbol_offset(char *symbol, long *offset); 445 451 int traceprobe_parse_event_name(const char **pevent, const char **pgroup,
+33 -20
kernel/trace/trace_uprobe.c
··· 8 8 #define pr_fmt(fmt) "trace_uprobe: " fmt 9 9 10 10 #include <linux/bpf-cgroup.h> 11 - #include <linux/security.h> 11 + #include <linux/cleanup.h> 12 12 #include <linux/ctype.h> 13 + #include <linux/filter.h> 13 14 #include <linux/module.h> 15 + #include <linux/namei.h> 16 + #include <linux/percpu.h> 17 + #include <linux/rculist.h> 18 + #include <linux/security.h> 19 + #include <linux/string.h> 14 20 #include <linux/uaccess.h> 15 21 #include <linux/uprobes.h> 16 - #include <linux/namei.h> 17 - #include <linux/string.h> 18 - #include <linux/rculist.h> 19 - #include <linux/filter.h> 20 - #include <linux/percpu.h> 21 22 23 + #include "trace.h" 22 24 #include "trace_dynevent.h" 23 25 #include "trace_probe.h" 24 26 #include "trace_probe_tmpl.h" ··· 539 537 */ 540 538 static int __trace_uprobe_create(int argc, const char **argv) 541 539 { 542 - struct trace_uprobe *tu; 543 540 const char *event = NULL, *group = UPROBE_EVENT_SYSTEM; 544 541 char *arg, *filename, *rctr, *rctr_end, *tmp; 545 - char buf[MAX_EVENT_NAME_LEN]; 546 - char gbuf[MAX_EVENT_NAME_LEN]; 547 - enum probe_print_type ptype; 548 - struct path path; 549 542 unsigned long offset, ref_ctr_offset; 543 + char *gbuf __free(kfree) = NULL; 544 + char *buf __free(kfree) = NULL; 545 + enum probe_print_type ptype; 546 + struct trace_uprobe *tu; 550 547 bool is_return = false; 548 + struct path path; 551 549 int i, ret; 552 550 553 551 ref_ctr_offset = 0; ··· 655 653 /* setup a probe */ 656 654 trace_probe_log_set_index(0); 657 655 if (event) { 656 + gbuf = kmalloc(MAX_EVENT_NAME_LEN, GFP_KERNEL); 657 + if (!gbuf) 658 + goto fail_mem; 659 + 658 660 ret = traceprobe_parse_event_name(&event, &group, gbuf, 659 661 event - argv[0]); 660 662 if (ret) ··· 670 664 char *ptr; 671 665 672 666 tail = kstrdup(kbasename(filename), GFP_KERNEL); 673 - if (!tail) { 674 - ret = -ENOMEM; 675 - goto fail_address_parse; 676 - } 667 + if (!tail) 668 + goto fail_mem; 677 669 678 670 ptr = strpbrk(tail, ".-_"); 679 671 if (ptr) 680 672 *ptr = '\0'; 681 673 674 + buf = kmalloc(MAX_EVENT_NAME_LEN, GFP_KERNEL); 675 + if (!buf) 676 + goto fail_mem; 682 677 snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_0x%lx", 'p', tail, offset); 683 678 event = buf; 684 679 kfree(tail); ··· 702 695 703 696 /* parse arguments */ 704 697 for (i = 0; i < argc; i++) { 705 - struct traceprobe_parse_context ctx = { 706 - .flags = (is_return ? TPARG_FL_RETURN : 0) | TPARG_FL_USER, 707 - }; 698 + struct traceprobe_parse_context *ctx __free(traceprobe_parse_context) 699 + = kzalloc(sizeof(*ctx), GFP_KERNEL); 708 700 701 + if (!ctx) { 702 + ret = -ENOMEM; 703 + goto error; 704 + } 705 + ctx->flags = (is_return ? TPARG_FL_RETURN : 0) | TPARG_FL_USER; 709 706 trace_probe_log_set_index(i + 2); 710 - ret = traceprobe_parse_probe_arg(&tu->tp, i, argv[i], &ctx); 711 - traceprobe_finish_parse(&ctx); 707 + ret = traceprobe_parse_probe_arg(&tu->tp, i, argv[i], ctx); 712 708 if (ret) 713 709 goto error; 714 710 } ··· 730 720 out: 731 721 trace_probe_log_clear(); 732 722 return ret; 723 + 724 + fail_mem: 725 + ret = -ENOMEM; 733 726 734 727 fail_address_parse: 735 728 trace_probe_log_clear();
+27 -19
tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc
··· 16 16 17 17 echo "f:myevent1 $PLACE" >> dynamic_events 18 18 19 - # Make sure the event is attached and is the only one 20 - grep -q $PLACE enabled_functions 21 - cnt=`cat enabled_functions | wc -l` 22 - if [ $cnt -ne $((ocnt + 1)) ]; then 23 - exit_fail 24 - fi 25 - 26 19 echo "f:myevent2 $PLACE%return" >> dynamic_events 27 - 28 - # It should till be the only attached function 29 - cnt=`cat enabled_functions | wc -l` 30 - if [ $cnt -ne $((ocnt + 1)) ]; then 31 - exit_fail 32 - fi 33 20 34 21 # add another event 35 22 echo "f:myevent3 $PLACE2" >> dynamic_events 36 - 37 - grep -q $PLACE2 enabled_functions 38 - cnt=`cat enabled_functions | wc -l` 39 - if [ $cnt -ne $((ocnt + 2)) ]; then 40 - exit_fail 41 - fi 42 23 43 24 grep -q myevent1 dynamic_events 44 25 grep -q myevent2 dynamic_events ··· 27 46 test -d events/fprobes/myevent1 28 47 test -d events/fprobes/myevent2 29 48 49 + echo 1 > events/fprobes/myevent1/enable 50 + # Make sure the event is attached and is the only one 51 + grep -q $PLACE enabled_functions 52 + cnt=`cat enabled_functions | wc -l` 53 + if [ $cnt -ne $((ocnt + 1)) ]; then 54 + exit_fail 55 + fi 56 + 57 + echo 1 > events/fprobes/myevent2/enable 58 + # It should till be the only attached function 59 + cnt=`cat enabled_functions | wc -l` 60 + if [ $cnt -ne $((ocnt + 1)) ]; then 61 + exit_fail 62 + fi 63 + 64 + echo 1 > events/fprobes/myevent3/enable 65 + # If the function is different, the attached function should be increased 66 + grep -q $PLACE2 enabled_functions 67 + cnt=`cat enabled_functions | wc -l` 68 + if [ $cnt -ne $((ocnt + 2)) ]; then 69 + exit_fail 70 + fi 71 + 72 + echo 0 > events/fprobes/myevent2/enable 30 73 echo "-:myevent2" >> dynamic_events 31 74 32 75 grep -q myevent1 dynamic_events ··· 62 57 exit_fail 63 58 fi 64 59 60 + echo 0 > events/fprobes/enable 65 61 echo > dynamic_events 66 62 67 63 # Should have none left ··· 73 67 74 68 echo "f:myevent4 $PLACE" >> dynamic_events 75 69 70 + echo 1 > events/fprobes/myevent4/enable 76 71 # Should only have one enabled 77 72 cnt=`cat enabled_functions | wc -l` 78 73 if [ $cnt -ne $((ocnt + 1)) ]; then 79 74 exit_fail 80 75 fi 81 76 77 + echo 0 > events/fprobes/enable 82 78 echo > dynamic_events 83 79 84 80 # Should have none left