Merge remote-tracking branch 'rostedt/tip/perf/urgent-2' into x86-urgent-for-linus

Changed files
+154 -16
arch
x86
+1 -1
arch/x86/include/asm/ftrace.h
··· 34 34 35 35 #ifndef __ASSEMBLY__ 36 36 extern void mcount(void); 37 - extern int modifying_ftrace_code; 37 + extern atomic_t modifying_ftrace_code; 38 38 39 39 static inline unsigned long ftrace_call_adjust(unsigned long addr) 40 40 {
+7 -1
arch/x86/kernel/cpu/common.c
··· 1101 1101 addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ)); 1102 1102 } 1103 1103 1104 + static DEFINE_PER_CPU(u32, debug_stack_use_ctr); 1105 + 1104 1106 void debug_stack_set_zero(void) 1105 1107 { 1108 + this_cpu_inc(debug_stack_use_ctr); 1106 1109 load_idt((const struct desc_ptr *)&nmi_idt_descr); 1107 1110 } 1108 1111 1109 1112 void debug_stack_reset(void) 1110 1113 { 1111 - load_idt((const struct desc_ptr *)&idt_descr); 1114 + if (WARN_ON(!this_cpu_read(debug_stack_use_ctr))) 1115 + return; 1116 + if (this_cpu_dec_return(debug_stack_use_ctr) == 0) 1117 + load_idt((const struct desc_ptr *)&idt_descr); 1112 1118 } 1113 1119 1114 1120 #else /* CONFIG_X86_64 */
+41 -3
arch/x86/kernel/entry_64.S
··· 191 191 .endm 192 192 193 193 /* 194 + * When dynamic function tracer is enabled it will add a breakpoint 195 + * to all locations that it is about to modify, sync CPUs, update 196 + * all the code, sync CPUs, then remove the breakpoints. In this time 197 + * if lockdep is enabled, it might jump back into the debug handler 198 + * outside the updating of the IST protection. (TRACE_IRQS_ON/OFF). 199 + * 200 + * We need to change the IDT table before calling TRACE_IRQS_ON/OFF to 201 + * make sure the stack pointer does not get reset back to the top 202 + * of the debug stack, and instead just reuses the current stack. 203 + */ 204 + #if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_TRACE_IRQFLAGS) 205 + 206 + .macro TRACE_IRQS_OFF_DEBUG 207 + call debug_stack_set_zero 208 + TRACE_IRQS_OFF 209 + call debug_stack_reset 210 + .endm 211 + 212 + .macro TRACE_IRQS_ON_DEBUG 213 + call debug_stack_set_zero 214 + TRACE_IRQS_ON 215 + call debug_stack_reset 216 + .endm 217 + 218 + .macro TRACE_IRQS_IRETQ_DEBUG offset=ARGOFFSET 219 + bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */ 220 + jnc 1f 221 + TRACE_IRQS_ON_DEBUG 222 + 1: 223 + .endm 224 + 225 + #else 226 + # define TRACE_IRQS_OFF_DEBUG TRACE_IRQS_OFF 227 + # define TRACE_IRQS_ON_DEBUG TRACE_IRQS_ON 228 + # define TRACE_IRQS_IRETQ_DEBUG TRACE_IRQS_IRETQ 229 + #endif 230 + 231 + /* 194 232 * C code is not supposed to know about undefined top of stack. Every time 195 233 * a C function with an pt_regs argument is called from the SYSCALL based 196 234 * fast path FIXUP_TOP_OF_STACK is needed. ··· 1136 1098 subq $ORIG_RAX-R15, %rsp 1137 1099 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 1138 1100 call save_paranoid 1139 - TRACE_IRQS_OFF 1101 + TRACE_IRQS_OFF_DEBUG 1140 1102 movq %rsp,%rdi /* pt_regs pointer */ 1141 1103 xorl %esi,%esi /* no error code */ 1142 1104 subq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist) ··· 1431 1393 ENTRY(paranoid_exit) 1432 1394 DEFAULT_FRAME 1433 1395 DISABLE_INTERRUPTS(CLBR_NONE) 1434 - TRACE_IRQS_OFF 1396 + TRACE_IRQS_OFF_DEBUG 1435 1397 testl %ebx,%ebx /* swapgs needed? */ 1436 1398 jnz paranoid_restore 1437 1399 testl $3,CS(%rsp) ··· 1442 1404 RESTORE_ALL 8 1443 1405 jmp irq_return 1444 1406 paranoid_restore: 1445 - TRACE_IRQS_IRETQ 0 1407 + TRACE_IRQS_IRETQ_DEBUG 0 1446 1408 RESTORE_ALL 8 1447 1409 jmp irq_return 1448 1410 paranoid_userspace:
+95 -7
arch/x86/kernel/ftrace.c
··· 100 100 } 101 101 102 102 static int 103 - ftrace_modify_code(unsigned long ip, unsigned const char *old_code, 103 + ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code, 104 104 unsigned const char *new_code) 105 105 { 106 106 unsigned char replaced[MCOUNT_INSN_SIZE]; ··· 141 141 old = ftrace_call_replace(ip, addr); 142 142 new = ftrace_nop_replace(); 143 143 144 - return ftrace_modify_code(rec->ip, old, new); 144 + /* 145 + * On boot up, and when modules are loaded, the MCOUNT_ADDR 146 + * is converted to a nop, and will never become MCOUNT_ADDR 147 + * again. This code is either running before SMP (on boot up) 148 + * or before the code will ever be executed (module load). 149 + * We do not want to use the breakpoint version in this case, 150 + * just modify the code directly. 151 + */ 152 + if (addr == MCOUNT_ADDR) 153 + return ftrace_modify_code_direct(rec->ip, old, new); 154 + 155 + /* Normal cases use add_brk_on_nop */ 156 + WARN_ONCE(1, "invalid use of ftrace_make_nop"); 157 + return -EINVAL; 145 158 } 146 159 147 160 int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) ··· 165 152 old = ftrace_nop_replace(); 166 153 new = ftrace_call_replace(ip, addr); 167 154 168 - return ftrace_modify_code(rec->ip, old, new); 155 + /* Should only be called when module is loaded */ 156 + return ftrace_modify_code_direct(rec->ip, old, new); 169 157 } 158 + 159 + /* 160 + * The modifying_ftrace_code is used to tell the breakpoint 161 + * handler to call ftrace_int3_handler(). If it fails to 162 + * call this handler for a breakpoint added by ftrace, then 163 + * the kernel may crash. 164 + * 165 + * As atomic_writes on x86 do not need a barrier, we do not 166 + * need to add smp_mb()s for this to work. It is also considered 167 + * that we can not read the modifying_ftrace_code before 168 + * executing the breakpoint. That would be quite remarkable if 169 + * it could do that. Here's the flow that is required: 170 + * 171 + * CPU-0 CPU-1 172 + * 173 + * atomic_inc(mfc); 174 + * write int3s 175 + * <trap-int3> // implicit (r)mb 176 + * if (atomic_read(mfc)) 177 + * call ftrace_int3_handler() 178 + * 179 + * Then when we are finished: 180 + * 181 + * atomic_dec(mfc); 182 + * 183 + * If we hit a breakpoint that was not set by ftrace, it does not 184 + * matter if ftrace_int3_handler() is called or not. It will 185 + * simply be ignored. But it is crucial that a ftrace nop/caller 186 + * breakpoint is handled. No other user should ever place a 187 + * breakpoint on an ftrace nop/caller location. It must only 188 + * be done by this code. 189 + */ 190 + atomic_t modifying_ftrace_code __read_mostly; 191 + 192 + static int 193 + ftrace_modify_code(unsigned long ip, unsigned const char *old_code, 194 + unsigned const char *new_code); 170 195 171 196 int ftrace_update_ftrace_func(ftrace_func_t func) 172 197 { ··· 214 163 215 164 memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE); 216 165 new = ftrace_call_replace(ip, (unsigned long)func); 166 + 167 + /* See comment above by declaration of modifying_ftrace_code */ 168 + atomic_inc(&modifying_ftrace_code); 169 + 217 170 ret = ftrace_modify_code(ip, old, new); 171 + 172 + atomic_dec(&modifying_ftrace_code); 218 173 219 174 return ret; 220 175 } 221 - 222 - int modifying_ftrace_code __read_mostly; 223 176 224 177 /* 225 178 * A breakpoint was added to the code address we are about to ··· 544 489 } 545 490 } 546 491 492 + static int 493 + ftrace_modify_code(unsigned long ip, unsigned const char *old_code, 494 + unsigned const char *new_code) 495 + { 496 + int ret; 497 + 498 + ret = add_break(ip, old_code); 499 + if (ret) 500 + goto out; 501 + 502 + run_sync(); 503 + 504 + ret = add_update_code(ip, new_code); 505 + if (ret) 506 + goto fail_update; 507 + 508 + run_sync(); 509 + 510 + ret = ftrace_write(ip, new_code, 1); 511 + if (ret) { 512 + ret = -EPERM; 513 + goto out; 514 + } 515 + run_sync(); 516 + out: 517 + return ret; 518 + 519 + fail_update: 520 + probe_kernel_write((void *)ip, &old_code[0], 1); 521 + goto out; 522 + } 523 + 547 524 void arch_ftrace_update_code(int command) 548 525 { 549 - modifying_ftrace_code++; 526 + /* See comment above by declaration of modifying_ftrace_code */ 527 + atomic_inc(&modifying_ftrace_code); 550 528 551 529 ftrace_modify_all_code(command); 552 530 553 - modifying_ftrace_code--; 531 + atomic_dec(&modifying_ftrace_code); 554 532 } 555 533 556 534 int __init ftrace_dyn_arch_init(void *data)
+4 -2
arch/x86/kernel/nmi.c
··· 444 444 */ 445 445 if (unlikely(is_debug_stack(regs->sp))) { 446 446 debug_stack_set_zero(); 447 - __get_cpu_var(update_debug_stack) = 1; 447 + this_cpu_write(update_debug_stack, 1); 448 448 } 449 449 } 450 450 451 451 static inline void nmi_nesting_postprocess(void) 452 452 { 453 - if (unlikely(__get_cpu_var(update_debug_stack))) 453 + if (unlikely(this_cpu_read(update_debug_stack))) { 454 454 debug_stack_reset(); 455 + this_cpu_write(update_debug_stack, 0); 456 + } 455 457 } 456 458 #endif 457 459
+6 -2
arch/x86/kernel/traps.c
··· 303 303 dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_code) 304 304 { 305 305 #ifdef CONFIG_DYNAMIC_FTRACE 306 - /* ftrace must be first, everything else may cause a recursive crash */ 307 - if (unlikely(modifying_ftrace_code) && ftrace_int3_handler(regs)) 306 + /* 307 + * ftrace must be first, everything else may cause a recursive crash. 308 + * See note by declaration of modifying_ftrace_code in ftrace.c 309 + */ 310 + if (unlikely(atomic_read(&modifying_ftrace_code)) && 311 + ftrace_int3_handler(regs)) 308 312 return; 309 313 #endif 310 314 #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP