Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'trace-v4.15-rc9' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace

Pull tracing fixes from Steven Rostedt:
"With the new ORC unwinder, ftrace stack tracing became disfunctional.

One was that ORC didn't know how to handle the ftrace callbacks in
general (which Josh fixed).

The other was that ORC would just bail if it hit a dynamically
allocated trampoline. Which means all ftrace stack tracing that
happens from the function tracer would produce no results (that
includes killing the max stack size tracer). I added a check to the
ORC unwinder to see if the trampoline belonged to ftrace, and if it
did, use the orc entry of the static trampoline that was used to
create the dynamic one (it would be identical).

Finally, I noticed that the skip values of the stack tracing were out
of whack. I went through and fixed them up"

* tag 'trace-v4.15-rc9' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace:
tracing: Update stack trace skipping for ORC unwinder
ftrace, orc, x86: Handle ftrace dynamically allocated trampolines
x86/ftrace: Fix ORC unwinding from ftrace handlers

+155 -55
+4 -1
arch/x86/kernel/Makefile
··· 29 29 KASAN_SANITIZE_paravirt.o := n 30 30 31 31 OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o := y 32 - OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y 33 32 OBJECT_FILES_NON_STANDARD_test_nx.o := y 34 33 OBJECT_FILES_NON_STANDARD_paravirt_patch_$(BITS).o := y 34 + 35 + ifdef CONFIG_FRAME_POINTER 36 + OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y 37 + endif 35 38 36 39 # If instrumentation of this dir is enabled, boot hangs during first second. 37 40 # Probably could be more selective here, but note that files related to irqs,
+15 -9
arch/x86/kernel/ftrace_64.S
··· 8 8 #include <asm/ftrace.h> 9 9 #include <asm/export.h> 10 10 #include <asm/nospec-branch.h> 11 + #include <asm/unwind_hints.h> 11 12 12 13 .code64 13 14 .section .entry.text, "ax" ··· 21 20 EXPORT_SYMBOL(mcount) 22 21 #endif 23 22 24 - /* All cases save the original rbp (8 bytes) */ 25 23 #ifdef CONFIG_FRAME_POINTER 26 24 # ifdef CC_USING_FENTRY 27 25 /* Save parent and function stack frames (rip and rbp) */ ··· 31 31 # endif 32 32 #else 33 33 /* No need to save a stack frame */ 34 - # define MCOUNT_FRAME_SIZE 8 34 + # define MCOUNT_FRAME_SIZE 0 35 35 #endif /* CONFIG_FRAME_POINTER */ 36 36 37 37 /* Size of stack used to save mcount regs in save_mcount_regs */ ··· 64 64 */ 65 65 .macro save_mcount_regs added=0 66 66 67 - /* Always save the original rbp */ 67 + #ifdef CONFIG_FRAME_POINTER 68 + /* Save the original rbp */ 68 69 pushq %rbp 69 70 70 - #ifdef CONFIG_FRAME_POINTER 71 71 /* 72 72 * Stack traces will stop at the ftrace trampoline if the frame pointer 73 73 * is not set up properly. If fentry is used, we need to save a frame ··· 105 105 * Save the original RBP. Even though the mcount ABI does not 106 106 * require this, it helps out callers. 107 107 */ 108 + #ifdef CONFIG_FRAME_POINTER 108 109 movq MCOUNT_REG_SIZE-8(%rsp), %rdx 110 + #else 111 + movq %rbp, %rdx 112 + #endif 109 113 movq %rdx, RBP(%rsp) 110 114 111 115 /* Copy the parent address into %rsi (second parameter) */ ··· 152 148 153 149 ENTRY(function_hook) 154 150 retq 155 - END(function_hook) 151 + ENDPROC(function_hook) 156 152 157 153 ENTRY(ftrace_caller) 158 154 /* save_mcount_regs fills in first two parameters */ ··· 188 184 /* This is weak to keep gas from relaxing the jumps */ 189 185 WEAK(ftrace_stub) 190 186 retq 191 - END(ftrace_caller) 187 + ENDPROC(ftrace_caller) 192 188 193 189 ENTRY(ftrace_regs_caller) 194 190 /* Save the current flags before any operations that can change them */ ··· 259 255 260 256 jmp ftrace_epilogue 261 257 262 - END(ftrace_regs_caller) 258 + ENDPROC(ftrace_regs_caller) 263 259 264 260 265 261 #else /* ! CONFIG_DYNAMIC_FTRACE */ ··· 317 313 restore_mcount_regs 318 314 319 315 retq 320 - END(ftrace_graph_caller) 316 + ENDPROC(ftrace_graph_caller) 321 317 322 - GLOBAL(return_to_handler) 318 + ENTRY(return_to_handler) 319 + UNWIND_HINT_EMPTY 323 320 subq $24, %rsp 324 321 325 322 /* Save the return values */ ··· 335 330 movq (%rsp), %rax 336 331 addq $24, %rsp 337 332 JMP_NOSPEC %rdi 333 + END(return_to_handler) 338 334 #endif
+47 -1
arch/x86/kernel/unwind_orc.c
··· 74 74 } 75 75 #endif 76 76 77 + #ifdef CONFIG_DYNAMIC_FTRACE 78 + static struct orc_entry *orc_find(unsigned long ip); 79 + 80 + /* 81 + * Ftrace dynamic trampolines do not have orc entries of their own. 82 + * But they are copies of the ftrace entries that are static and 83 + * defined in ftrace_*.S, which do have orc entries. 84 + * 85 + * If the undwinder comes across a ftrace trampoline, then find the 86 + * ftrace function that was used to create it, and use that ftrace 87 + * function's orc entrie, as the placement of the return code in 88 + * the stack will be identical. 89 + */ 90 + static struct orc_entry *orc_ftrace_find(unsigned long ip) 91 + { 92 + struct ftrace_ops *ops; 93 + unsigned long caller; 94 + 95 + ops = ftrace_ops_trampoline(ip); 96 + if (!ops) 97 + return NULL; 98 + 99 + if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) 100 + caller = (unsigned long)ftrace_regs_call; 101 + else 102 + caller = (unsigned long)ftrace_call; 103 + 104 + /* Prevent unlikely recursion */ 105 + if (ip == caller) 106 + return NULL; 107 + 108 + return orc_find(caller); 109 + } 110 + #else 111 + static struct orc_entry *orc_ftrace_find(unsigned long ip) 112 + { 113 + return NULL; 114 + } 115 + #endif 116 + 77 117 static struct orc_entry *orc_find(unsigned long ip) 78 118 { 119 + static struct orc_entry *orc; 120 + 79 121 if (!orc_init) 80 122 return NULL; 81 123 ··· 153 111 __stop_orc_unwind_ip - __start_orc_unwind_ip, ip); 154 112 155 113 /* Module lookup: */ 156 - return orc_module_find(ip); 114 + orc = orc_module_find(ip); 115 + if (orc) 116 + return orc; 117 + 118 + return orc_ftrace_find(ip); 157 119 } 158 120 159 121 static void orc_sort_swap(void *_a, void *_b, int size)
+2
include/linux/ftrace.h
··· 332 332 333 333 extern int ftrace_nr_registered_ops(void); 334 334 335 + struct ftrace_ops *ftrace_ops_trampoline(unsigned long addr); 336 + 335 337 bool is_ftrace_trampoline(unsigned long addr); 336 338 337 339 /*
+17 -12
kernel/trace/ftrace.c
··· 1119 1119 }; 1120 1120 1121 1121 /* 1122 - * This is used by __kernel_text_address() to return true if the 1123 - * address is on a dynamically allocated trampoline that would 1124 - * not return true for either core_kernel_text() or 1125 - * is_module_text_address(). 1122 + * Used by the stack undwinder to know about dynamic ftrace trampolines. 1126 1123 */ 1127 - bool is_ftrace_trampoline(unsigned long addr) 1124 + struct ftrace_ops *ftrace_ops_trampoline(unsigned long addr) 1128 1125 { 1129 - struct ftrace_ops *op; 1130 - bool ret = false; 1126 + struct ftrace_ops *op = NULL; 1131 1127 1132 1128 /* 1133 1129 * Some of the ops may be dynamically allocated, ··· 1140 1144 if (op->trampoline && op->trampoline_size) 1141 1145 if (addr >= op->trampoline && 1142 1146 addr < op->trampoline + op->trampoline_size) { 1143 - ret = true; 1144 - goto out; 1147 + preempt_enable_notrace(); 1148 + return op; 1145 1149 } 1146 1150 } while_for_each_ftrace_op(op); 1147 - 1148 - out: 1149 1151 preempt_enable_notrace(); 1150 1152 1151 - return ret; 1153 + return NULL; 1154 + } 1155 + 1156 + /* 1157 + * This is used by __kernel_text_address() to return true if the 1158 + * address is on a dynamically allocated trampoline that would 1159 + * not return true for either core_kernel_text() or 1160 + * is_module_text_address(). 1161 + */ 1162 + bool is_ftrace_trampoline(unsigned long addr) 1163 + { 1164 + return ftrace_ops_trampoline(addr) != NULL; 1152 1165 } 1153 1166 1154 1167 struct ftrace_page {
+20 -14
kernel/trace/trace.c
··· 2374 2374 } 2375 2375 EXPORT_SYMBOL_GPL(trace_event_buffer_commit); 2376 2376 2377 + /* 2378 + * Skip 3: 2379 + * 2380 + * trace_buffer_unlock_commit_regs() 2381 + * trace_event_buffer_commit() 2382 + * trace_event_raw_event_xxx() 2383 + */ 2384 + # define STACK_SKIP 3 2385 + 2377 2386 void trace_buffer_unlock_commit_regs(struct trace_array *tr, 2378 2387 struct ring_buffer *buffer, 2379 2388 struct ring_buffer_event *event, ··· 2392 2383 __buffer_unlock_commit(buffer, event); 2393 2384 2394 2385 /* 2395 - * If regs is not set, then skip the following callers: 2396 - * trace_buffer_unlock_commit_regs 2397 - * event_trigger_unlock_commit 2398 - * trace_event_buffer_commit 2399 - * trace_event_raw_event_sched_switch 2386 + * If regs is not set, then skip the necessary functions. 2400 2387 * Note, we can still get here via blktrace, wakeup tracer 2401 2388 * and mmiotrace, but that's ok if they lose a function or 2402 - * two. They are that meaningful. 2389 + * two. They are not that meaningful. 2403 2390 */ 2404 - ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs); 2391 + ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs); 2405 2392 ftrace_trace_userstack(buffer, flags, pc); 2406 2393 } 2407 2394 ··· 2584 2579 trace.skip = skip; 2585 2580 2586 2581 /* 2587 - * Add two, for this function and the call to save_stack_trace() 2582 + * Add one, for this function and the call to save_stack_trace() 2588 2583 * If regs is set, then these functions will not be in the way. 2589 2584 */ 2585 + #ifndef CONFIG_UNWINDER_ORC 2590 2586 if (!regs) 2591 - trace.skip += 2; 2587 + trace.skip++; 2588 + #endif 2592 2589 2593 2590 /* 2594 2591 * Since events can happen in NMIs there's no safe way to ··· 2718 2711 2719 2712 local_save_flags(flags); 2720 2713 2721 - /* 2722 - * Skip 3 more, seems to get us at the caller of 2723 - * this function. 2724 - */ 2725 - skip += 3; 2714 + #ifndef CONFIG_UNWINDER_ORC 2715 + /* Skip 1 to skip this function. */ 2716 + skip++; 2717 + #endif 2726 2718 __ftrace_trace_stack(global_trace.trace_buffer.buffer, 2727 2719 flags, skip, preempt_count(), NULL); 2728 2720 }
+13 -4
kernel/trace/trace_events_trigger.c
··· 1123 1123 #endif /* CONFIG_TRACER_SNAPSHOT */ 1124 1124 1125 1125 #ifdef CONFIG_STACKTRACE 1126 - /* 1127 - * Skip 3: 1128 - * stacktrace_trigger() 1126 + #ifdef CONFIG_UNWINDER_ORC 1127 + /* Skip 2: 1129 1128 * event_triggers_post_call() 1130 1129 * trace_event_raw_event_xxx() 1131 1130 */ 1132 - #define STACK_SKIP 3 1131 + # define STACK_SKIP 2 1132 + #else 1133 + /* 1134 + * Skip 4: 1135 + * stacktrace_trigger() 1136 + * event_triggers_post_call() 1137 + * trace_event_buffer_commit() 1138 + * trace_event_raw_event_xxx() 1139 + */ 1140 + #define STACK_SKIP 4 1141 + #endif 1133 1142 1134 1143 static void 1135 1144 stacktrace_trigger(struct event_trigger_data *data, void *rec)
+37 -14
kernel/trace/trace_functions.c
··· 154 154 preempt_enable_notrace(); 155 155 } 156 156 157 + #ifdef CONFIG_UNWINDER_ORC 158 + /* 159 + * Skip 2: 160 + * 161 + * function_stack_trace_call() 162 + * ftrace_call() 163 + */ 164 + #define STACK_SKIP 2 165 + #else 166 + /* 167 + * Skip 3: 168 + * __trace_stack() 169 + * function_stack_trace_call() 170 + * ftrace_call() 171 + */ 172 + #define STACK_SKIP 3 173 + #endif 174 + 157 175 static void 158 176 function_stack_trace_call(unsigned long ip, unsigned long parent_ip, 159 177 struct ftrace_ops *op, struct pt_regs *pt_regs) ··· 198 180 if (likely(disabled == 1)) { 199 181 pc = preempt_count(); 200 182 trace_function(tr, ip, parent_ip, flags, pc); 201 - /* 202 - * skip over 5 funcs: 203 - * __ftrace_trace_stack, 204 - * __trace_stack, 205 - * function_stack_trace_call 206 - * ftrace_list_func 207 - * ftrace_call 208 - */ 209 - __trace_stack(tr, flags, 5, pc); 183 + __trace_stack(tr, flags, STACK_SKIP, pc); 210 184 } 211 185 212 186 atomic_dec(&data->disabled); ··· 377 367 tracer_tracing_off(tr); 378 368 } 379 369 370 + #ifdef CONFIG_UNWINDER_ORC 380 371 /* 381 - * Skip 4: 382 - * ftrace_stacktrace() 372 + * Skip 3: 373 + * 383 374 * function_trace_probe_call() 384 - * ftrace_ops_list_func() 375 + * ftrace_ops_assist_func() 385 376 * ftrace_call() 386 377 */ 387 - #define STACK_SKIP 4 378 + #define FTRACE_STACK_SKIP 3 379 + #else 380 + /* 381 + * Skip 5: 382 + * 383 + * __trace_stack() 384 + * ftrace_stacktrace() 385 + * function_trace_probe_call() 386 + * ftrace_ops_assist_func() 387 + * ftrace_call() 388 + */ 389 + #define FTRACE_STACK_SKIP 5 390 + #endif 388 391 389 392 static __always_inline void trace_stack(struct trace_array *tr) 390 393 { ··· 407 384 local_save_flags(flags); 408 385 pc = preempt_count(); 409 386 410 - __trace_stack(tr, flags, STACK_SKIP, pc); 387 + __trace_stack(tr, flags, FTRACE_STACK_SKIP, pc); 411 388 } 412 389 413 390 static void