Merge tag 'trace-v5.18-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace

Pull more tracing updates from Steven Rostedt:

- Rename the staging files to give them some meaning. Just
stage1,stag2,etc, does not show what they are for

- Check for NULL from allocation in bootconfig

- Hold event mutex for dyn_event call in user events

- Mark user events to broken (to work on the API)

- Remove eBPF updates from user events

- Remove user events from uapi header to keep it from being installed.

- Move ftrace_graph_is_dead() into inline as it is called from hot
paths and also convert it into a static branch.

* tag 'trace-v5.18-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace:
tracing: Move user_events.h temporarily out of include/uapi
ftrace: Make ftrace_graph_is_dead() a static branch
tracing: Set user_events to BROKEN
tracing/user_events: Remove eBPF interfaces
tracing/user_events: Hold event_mutex during dyn_event_add
proc: bootconfig: Add null pointer check
tracing: Rename the staging files for trace_events

+3 -11
Documentation/trace/user_events.rst
··· 7 7 Overview 8 8 -------- 9 9 User based trace events allow user processes to create events and trace data 10 - that can be viewed via existing tools, such as ftrace, perf and eBPF. 10 + that can be viewed via existing tools, such as ftrace and perf. 11 11 To enable this feature, build your kernel with CONFIG_USER_EVENTS=y. 12 12 13 13 Programs can view status of the events via ··· 67 67 68 68 Supported Flags 69 69 ^^^^^^^^^^^^^^^ 70 - **BPF_ITER** - EBPF programs attached to this event will get the raw iovec 71 - struct instead of any data copies for max performance. 70 + None yet 72 71 73 72 Field Format 74 73 ^^^^^^^^^^^^ ··· 159 160 160 161 **EVENT_STATUS_FTRACE** - Bit set if ftrace has been attached (Bit 0). 161 162 162 - **EVENT_STATUS_PERF** - Bit set if perf/eBPF has been attached (Bit 1). 163 + **EVENT_STATUS_PERF** - Bit set if perf has been attached (Bit 1). 163 164 164 165 Writing Data 165 166 ------------ ··· 202 203 writev(fd, (const struct iovec*)io, 2); 203 204 204 205 **NOTE:** *The write_index is not emitted out into the trace being recorded.* 205 - 206 - EBPF 207 - ---- 208 - EBPF programs that attach to a user-based event tracepoint are given a pointer 209 - to a struct user_bpf_context. The bpf context contains the data type (which can 210 - be a user or kernel buffer, or can be a pointer to the iovec) and the data 211 - length that was emitted (minus the write_index). 212 206 213 207 Example Code 214 208 ------------
+2
fs/proc/bootconfig.c
··· 32 32 int ret = 0; 33 33 34 34 key = kzalloc(XBC_KEYLEN_MAX, GFP_KERNEL); 35 + if (!key) 36 + return -ENOMEM; 35 37 36 38 xbc_for_each_key_value(leaf, val) { 37 39 ret = xbc_node_compose_key(leaf, key, XBC_KEYLEN_MAX);
+15 -1
include/linux/ftrace.h
··· 9 9 10 10 #include <linux/trace_recursion.h> 11 11 #include <linux/trace_clock.h> 12 + #include <linux/jump_label.h> 12 13 #include <linux/kallsyms.h> 13 14 #include <linux/linkage.h> 14 15 #include <linux/bitops.h> ··· 1019 1018 extern int register_ftrace_graph(struct fgraph_ops *ops); 1020 1019 extern void unregister_ftrace_graph(struct fgraph_ops *ops); 1021 1020 1022 - extern bool ftrace_graph_is_dead(void); 1021 + /** 1022 + * ftrace_graph_is_dead - returns true if ftrace_graph_stop() was called 1023 + * 1024 + * ftrace_graph_stop() is called when a severe error is detected in 1025 + * the function graph tracing. This function is called by the critical 1026 + * paths of function graph to keep those paths from doing any more harm. 1027 + */ 1028 + DECLARE_STATIC_KEY_FALSE(kill_ftrace_graph); 1029 + 1030 + static inline bool ftrace_graph_is_dead(void) 1031 + { 1032 + return static_branch_unlikely(&kill_ftrace_graph); 1033 + } 1034 + 1023 1035 extern void ftrace_graph_stop(void); 1024 1036 1025 1037 /* The current handlers in use */
-53
include/linux/user_events.h
··· 32 32 /* Create dynamic location entry within a 32-bit value */ 33 33 #define DYN_LOC(offset, size) ((size) << 16 | (offset)) 34 34 35 - /* Use raw iterator for attached BPF program(s), no affect on ftrace/perf */ 36 - #define FLAG_BPF_ITER (1 << 0) 37 - 38 35 /* 39 36 * Describes an event registration and stores the results of the registration. 40 37 * This structure is passed to the DIAG_IOCSREG ioctl, callers at a minimum ··· 59 62 60 63 /* Requests to delete a user_event */ 61 64 #define DIAG_IOCSDEL _IOW(DIAG_IOC_MAGIC, 1, char*) 62 - 63 - /* Data type that was passed to the BPF program */ 64 - enum { 65 - /* Data resides in kernel space */ 66 - USER_BPF_DATA_KERNEL, 67 - 68 - /* Data resides in user space */ 69 - USER_BPF_DATA_USER, 70 - 71 - /* Data is a pointer to a user_bpf_iter structure */ 72 - USER_BPF_DATA_ITER, 73 - }; 74 - 75 - /* 76 - * Describes an iovec iterator that BPF programs can use to access data for 77 - * a given user_event write() / writev() call. 78 - */ 79 - struct user_bpf_iter { 80 - 81 - /* Offset of the data within the first iovec */ 82 - __u32 iov_offset; 83 - 84 - /* Number of iovec structures */ 85 - __u32 nr_segs; 86 - 87 - /* Pointer to iovec structures */ 88 - const struct iovec *iov; 89 - }; 90 - 91 - /* Context that BPF programs receive when attached to a user_event */ 92 - struct user_bpf_context { 93 - 94 - /* Data type being passed (see union below) */ 95 - __u32 data_type; 96 - 97 - /* Length of the data */ 98 - __u32 data_len; 99 - 100 - /* Pointer to data, varies by data type */ 101 - union { 102 - /* Kernel data (data_type == USER_BPF_DATA_KERNEL) */ 103 - void *kdata; 104 - 105 - /* User data (data_type == USER_BPF_DATA_USER) */ 106 - void *udata; 107 - 108 - /* Direct iovec (data_type == USER_BPF_DATA_ITER) */ 109 - struct user_bpf_iter *iter; 110 - }; 111 - }; 112 65 113 66 #endif /* _UAPI_LINUX_USER_EVENTS_H */
include/trace/stages/stage1_defines.h include/trace/stages/stage1_struct_define.h
include/trace/stages/stage2_defines.h include/trace/stages/stage2_data_offsets.h
include/trace/stages/stage3_defines.h include/trace/stages/stage3_trace_output.h
include/trace/stages/stage4_defines.h include/trace/stages/stage4_event_fields.h
include/trace/stages/stage5_defines.h include/trace/stages/stage5_get_offsets.h
include/trace/stages/stage6_defines.h include/trace/stages/stage6_event_callback.h
include/trace/stages/stage7_defines.h include/trace/stages/stage7_class_define.h
+7 -7
include/trace/trace_custom_events.h
··· 35 35 36 36 /* Stage 1 creates the structure of the recorded event layout */ 37 37 38 - #include "stages/stage1_defines.h" 38 + #include "stages/stage1_struct_define.h" 39 39 40 40 #undef DECLARE_CUSTOM_EVENT_CLASS 41 41 #define DECLARE_CUSTOM_EVENT_CLASS(name, proto, args, tstruct, assign, print) \ ··· 56 56 57 57 /* Stage 2 creates the custom class */ 58 58 59 - #include "stages/stage2_defines.h" 59 + #include "stages/stage2_data_offsets.h" 60 60 61 61 #undef DECLARE_CUSTOM_EVENT_CLASS 62 62 #define DECLARE_CUSTOM_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ ··· 71 71 72 72 /* Stage 3 create the way to print the custom event */ 73 73 74 - #include "stages/stage3_defines.h" 74 + #include "stages/stage3_trace_output.h" 75 75 76 76 #undef DECLARE_CUSTOM_EVENT_CLASS 77 77 #define DECLARE_CUSTOM_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ ··· 102 102 103 103 /* Stage 4 creates the offset layout for the fields */ 104 104 105 - #include "stages/stage4_defines.h" 105 + #include "stages/stage4_event_fields.h" 106 106 107 107 #undef DECLARE_CUSTOM_EVENT_CLASS 108 108 #define DECLARE_CUSTOM_EVENT_CLASS(call, proto, args, tstruct, func, print) \ ··· 114 114 115 115 /* Stage 5 creates the helper function for dynamic fields */ 116 116 117 - #include "stages/stage5_defines.h" 117 + #include "stages/stage5_get_offsets.h" 118 118 119 119 #undef DECLARE_CUSTOM_EVENT_CLASS 120 120 #define DECLARE_CUSTOM_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ ··· 134 134 135 135 /* Stage 6 creates the probe function that records the event */ 136 136 137 - #include "stages/stage6_defines.h" 137 + #include "stages/stage6_event_callback.h" 138 138 139 139 #undef DECLARE_CUSTOM_EVENT_CLASS 140 140 #define DECLARE_CUSTOM_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ ··· 182 182 183 183 /* Stage 7 creates the actual class and event structure for the custom event */ 184 184 185 - #include "stages/stage7_defines.h" 185 + #include "stages/stage7_class_define.h" 186 186 187 187 #undef DECLARE_CUSTOM_EVENT_CLASS 188 188 #define DECLARE_CUSTOM_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
+7 -7
include/trace/trace_events.h
··· 45 45 PARAMS(print)); \ 46 46 DEFINE_EVENT(name, name, PARAMS(proto), PARAMS(args)); 47 47 48 - #include "stages/stage1_defines.h" 48 + #include "stages/stage1_struct_define.h" 49 49 50 50 #undef DECLARE_EVENT_CLASS 51 51 #define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print) \ ··· 109 109 * The size of an array is also encoded, in the higher 16 bits of <item>. 110 110 */ 111 111 112 - #include "stages/stage2_defines.h" 112 + #include "stages/stage2_data_offsets.h" 113 113 114 114 #undef DECLARE_EVENT_CLASS 115 115 #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ ··· 181 181 * in binary. 182 182 */ 183 183 184 - #include "stages/stage3_defines.h" 184 + #include "stages/stage3_trace_output.h" 185 185 186 186 #undef DECLARE_EVENT_CLASS 187 187 #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ ··· 236 236 237 237 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) 238 238 239 - #include "stages/stage4_defines.h" 239 + #include "stages/stage4_event_fields.h" 240 240 241 241 #undef DECLARE_EVENT_CLASS 242 242 #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, func, print) \ ··· 249 249 250 250 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) 251 251 252 - #include "stages/stage5_defines.h" 252 + #include "stages/stage5_get_offsets.h" 253 253 254 254 #undef DECLARE_EVENT_CLASS 255 255 #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ ··· 372 372 #define _TRACE_PERF_INIT(call) 373 373 #endif /* CONFIG_PERF_EVENTS */ 374 374 375 - #include "stages/stage6_defines.h" 375 + #include "stages/stage6_event_callback.h" 376 376 377 377 #undef DECLARE_EVENT_CLASS 378 378 #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \ ··· 418 418 419 419 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) 420 420 421 - #include "stages/stage7_defines.h" 421 + #include "stages/stage7_class_define.h" 422 422 423 423 #undef DECLARE_EVENT_CLASS 424 424 #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print) \
+3 -14
kernel/trace/fgraph.c
··· 7 7 * 8 8 * Highly modified by Steven Rostedt (VMware). 9 9 */ 10 + #include <linux/jump_label.h> 10 11 #include <linux/suspend.h> 11 12 #include <linux/ftrace.h> 12 13 #include <linux/slab.h> ··· 24 23 #define ASSIGN_OPS_HASH(opsname, val) 25 24 #endif 26 25 27 - static bool kill_ftrace_graph; 26 + DEFINE_STATIC_KEY_FALSE(kill_ftrace_graph); 28 27 int ftrace_graph_active; 29 28 30 29 /* Both enabled by default (can be cleared by function_graph tracer flags */ 31 30 static bool fgraph_sleep_time = true; 32 - 33 - /** 34 - * ftrace_graph_is_dead - returns true if ftrace_graph_stop() was called 35 - * 36 - * ftrace_graph_stop() is called when a severe error is detected in 37 - * the function graph tracing. This function is called by the critical 38 - * paths of function graph to keep those paths from doing any more harm. 39 - */ 40 - bool ftrace_graph_is_dead(void) 41 - { 42 - return kill_ftrace_graph; 43 - } 44 31 45 32 /** 46 33 * ftrace_graph_stop - set to permanently disable function graph tracing ··· 40 51 */ 41 52 void ftrace_graph_stop(void) 42 53 { 43 - kill_ftrace_graph = true; 54 + static_branch_enable(&kill_ftrace_graph); 44 55 } 45 56 46 57 /* Add a function return address to the trace stack on thread info.*/
+7 -74
kernel/trace/trace_events_user.c
··· 47 47 #define MAX_FIELD_ARRAY_SIZE 1024 48 48 #define MAX_FIELD_ARG_NAME 256 49 49 50 - #define MAX_BPF_COPY_SIZE PAGE_SIZE 51 - #define MAX_STACK_BPF_DATA 512 52 - 53 50 static char *register_page_data; 54 51 55 52 static DEFINE_MUTEX(reg_mutex); ··· 407 410 type[0] != 'u', FILTER_OTHER); 408 411 } 409 412 410 - static void user_event_parse_flags(struct user_event *user, char *flags) 411 - { 412 - char *flag; 413 - 414 - if (flags == NULL) 415 - return; 416 - 417 - while ((flag = strsep(&flags, ",")) != NULL) { 418 - if (strcmp(flag, "BPF_ITER") == 0) 419 - user->flags |= FLAG_BPF_ITER; 420 - } 421 - } 422 - 423 413 static int user_event_parse_fields(struct user_event *user, char *args) 424 414 { 425 415 char *field; ··· 702 718 } 703 719 704 720 #ifdef CONFIG_PERF_EVENTS 705 - static void user_event_bpf(struct user_event *user, struct iov_iter *i) 706 - { 707 - struct user_bpf_context context; 708 - struct user_bpf_iter bpf_i; 709 - char fast_data[MAX_STACK_BPF_DATA]; 710 - void *temp = NULL; 711 - 712 - if ((user->flags & FLAG_BPF_ITER) && iter_is_iovec(i)) { 713 - /* Raw iterator */ 714 - context.data_type = USER_BPF_DATA_ITER; 715 - context.data_len = i->count; 716 - context.iter = &bpf_i; 717 - 718 - bpf_i.iov_offset = i->iov_offset; 719 - bpf_i.iov = i->iov; 720 - bpf_i.nr_segs = i->nr_segs; 721 - } else if (i->nr_segs == 1 && iter_is_iovec(i)) { 722 - /* Single buffer from user */ 723 - context.data_type = USER_BPF_DATA_USER; 724 - context.data_len = i->count; 725 - context.udata = i->iov->iov_base + i->iov_offset; 726 - } else { 727 - /* Multi buffer from user */ 728 - struct iov_iter copy = *i; 729 - size_t copy_size = min_t(size_t, i->count, MAX_BPF_COPY_SIZE); 730 - 731 - context.data_type = USER_BPF_DATA_KERNEL; 732 - context.kdata = fast_data; 733 - 734 - if (unlikely(copy_size > sizeof(fast_data))) { 735 - temp = kmalloc(copy_size, GFP_NOWAIT); 736 - 737 - if (temp) 738 - context.kdata = temp; 739 - else 740 - copy_size = sizeof(fast_data); 741 - } 742 - 743 - context.data_len = copy_nofault(context.kdata, 744 - copy_size, &copy); 745 - } 746 - 747 - trace_call_bpf(&user->call, &context); 748 - 749 - kfree(temp); 750 - } 751 - 752 721 /* 753 - * Writes the user supplied payload out to perf ring buffer or eBPF program. 722 + * Writes the user supplied payload out to perf ring buffer. 754 723 */ 755 724 static void user_event_perf(struct user_event *user, struct iov_iter *i, 756 725 void *tpdata, bool *faulted) 757 726 { 758 727 struct hlist_head *perf_head; 759 - 760 - if (bpf_prog_array_valid(&user->call)) 761 - user_event_bpf(user, i); 762 728 763 729 perf_head = this_cpu_ptr(user->call.perf_events); 764 730 ··· 1075 1141 1076 1142 user->tracepoint.name = name; 1077 1143 1078 - user_event_parse_flags(user, flags); 1079 - 1080 1144 ret = user_event_parse_fields(user, args); 1081 1145 1082 1146 if (ret) ··· 1102 1170 #endif 1103 1171 1104 1172 mutex_lock(&event_mutex); 1173 + 1105 1174 ret = user_event_trace_register(user); 1106 - mutex_unlock(&event_mutex); 1107 1175 1108 1176 if (ret) 1109 - goto put_user; 1177 + goto put_user_lock; 1110 1178 1111 1179 user->index = index; 1112 1180 ··· 1118 1186 set_bit(user->index, page_bitmap); 1119 1187 hash_add(register_table, &user->node, key); 1120 1188 1189 + mutex_unlock(&event_mutex); 1190 + 1121 1191 *newuser = user; 1122 1192 return 0; 1193 + put_user_lock: 1194 + mutex_unlock(&event_mutex); 1123 1195 put_user: 1124 1196 user_event_destroy_fields(user); 1125 1197 user_event_destroy_validators(user); ··· 1515 1579 seq_puts(m, " other"); 1516 1580 busy++; 1517 1581 } 1518 - 1519 - if (flags & FLAG_BPF_ITER) 1520 - seq_puts(m, " FLAG:BPF_ITER"); 1521 1582 1522 1583 seq_puts(m, "\n"); 1523 1584 active++;