Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull more perf updates from Ingo Molnar:
"A second round of perf updates:

- wide reaching kprobes sanitization and robustization, with the hope
of fixing all 'probe this function crashes the kernel' bugs, by
Masami Hiramatsu.

- uprobes updates from Oleg Nesterov: tmpfs support, corner case
fixes and robustization work.

- perf tooling updates and fixes from Jiri Olsa, Namhyung Ki, Arnaldo
et al:
* Add support to accumulate hist periods (Namhyung Kim)
* various fixes, refactorings and enhancements"

* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (101 commits)
perf: Differentiate exec() and non-exec() comm events
perf: Fix perf_event_comm() vs. exec() assumption
uprobes/x86: Rename arch_uprobe->def to ->defparam, minor comment updates
perf/documentation: Add description for conditional branch filter
perf/x86: Add conditional branch filtering support
perf/tool: Add conditional branch filter 'cond' to perf record
perf: Add new conditional branch filter 'PERF_SAMPLE_BRANCH_COND'
uprobes: Teach copy_insn() to support tmpfs
uprobes: Shift ->readpage check from __copy_insn() to uprobe_register()
perf/x86: Use common PMU interrupt disabled code
perf/ARM: Use common PMU interrupt disabled code
perf: Disable sampled events if no PMU interrupt
perf: Fix use after free in perf_remove_from_context()
perf tools: Fix 'make help' message error
perf record: Fix poll return value propagation
perf tools: Move elide bool into perf_hpp_fmt struct
perf tools: Remove elide setup for SORT_MODE__MEMORY mode
perf tools: Fix "==" into "=" in ui_browser__warning assignment
perf tools: Allow overriding sysfs and proc finding with env var
perf tools: Consider header files outside perf directory in tags target
...

+2993 -1196
+15 -1
Documentation/kprobes.txt
··· 22 22 23 23 Kprobes enables you to dynamically break into any kernel routine and 24 24 collect debugging and performance information non-disruptively. You 25 - can trap at almost any kernel code address, specifying a handler 25 + can trap at almost any kernel code address(*), specifying a handler 26 26 routine to be invoked when the breakpoint is hit. 27 + (*: some parts of the kernel code can not be trapped, see 1.5 Blacklist) 27 28 28 29 There are currently three types of probes: kprobes, jprobes, and 29 30 kretprobes (also called return probes). A kprobe can be inserted ··· 273 272 - Specify an empty function for the kprobe's post_handler or break_handler. 274 273 or 275 274 - Execute 'sysctl -w debug.kprobes_optimization=n' 275 + 276 + 1.5 Blacklist 277 + 278 + Kprobes can probe most of the kernel except itself. This means 279 + that there are some functions where kprobes cannot probe. Probing 280 + (trapping) such functions can cause a recursive trap (e.g. double 281 + fault) or the nested probe handler may never be called. 282 + Kprobes manages such functions as a blacklist. 283 + If you want to add a function into the blacklist, you just need 284 + to (1) include linux/kprobes.h and (2) use NOKPROBE_SYMBOL() macro 285 + to specify a blacklisted function. 286 + Kprobes checks the given probe address against the blacklist and 287 + rejects registering it, if the given address is in the blacklist. 276 288 277 289 2. Architectures Supported 278 290
+1 -1
arch/arm/kernel/perf_event.c
··· 410 410 */ 411 411 hwc->config_base |= (unsigned long)mapping; 412 412 413 - if (!hwc->sample_period) { 413 + if (!is_sampling_event(event)) { 414 414 /* 415 415 * For non-sampling runs, limit the sample_period to half 416 416 * of the counter width. That way, the new counter value
+6 -2
arch/arm/kernel/perf_event_cpu.c
··· 126 126 127 127 irqs = min(pmu_device->num_resources, num_possible_cpus()); 128 128 if (irqs < 1) { 129 - pr_err("no irqs for PMUs defined\n"); 130 - return -ENODEV; 129 + printk_once("perf/ARM: No irqs for PMU defined, sampling events not supported\n"); 130 + return 0; 131 131 } 132 132 133 133 irq = platform_get_irq(pmu_device, 0); ··· 191 191 /* Ensure the PMU has sane values out of reset. */ 192 192 if (cpu_pmu->reset) 193 193 on_each_cpu(cpu_pmu->reset, cpu_pmu, 1); 194 + 195 + /* If no interrupts available, set the corresponding capability flag */ 196 + if (!platform_get_irq(cpu_pmu->plat_device, 0)) 197 + cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; 194 198 } 195 199 196 200 /*
+7
arch/x86/include/asm/asm.h
··· 57 57 .long (from) - . ; \ 58 58 .long (to) - . + 0x7ffffff0 ; \ 59 59 .popsection 60 + 61 + # define _ASM_NOKPROBE(entry) \ 62 + .pushsection "_kprobe_blacklist","aw" ; \ 63 + _ASM_ALIGN ; \ 64 + _ASM_PTR (entry); \ 65 + .popsection 60 66 #else 61 67 # define _ASM_EXTABLE(from,to) \ 62 68 " .pushsection \"__ex_table\",\"a\"\n" \ ··· 77 71 " .long (" #from ") - .\n" \ 78 72 " .long (" #to ") - . + 0x7ffffff0\n" \ 79 73 " .popsection\n" 74 + /* For C file, we already have NOKPROBE_SYMBOL macro */ 80 75 #endif 81 76 82 77 #endif /* _ASM_X86_ASM_H */
+2
arch/x86/include/asm/kprobes.h
··· 116 116 extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr); 117 117 extern int kprobe_exceptions_notify(struct notifier_block *self, 118 118 unsigned long val, void *data); 119 + extern int kprobe_int3_handler(struct pt_regs *regs); 120 + extern int kprobe_debug_handler(struct pt_regs *regs); 119 121 #endif /* _ASM_X86_KPROBES_H */
+1 -2
arch/x86/include/asm/traps.h
··· 68 68 dotraplinkage void do_stack_segment(struct pt_regs *, long); 69 69 #ifdef CONFIG_X86_64 70 70 dotraplinkage void do_double_fault(struct pt_regs *, long); 71 - asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *); 71 + asmlinkage struct pt_regs *sync_regs(struct pt_regs *); 72 72 #endif 73 73 dotraplinkage void do_general_protection(struct pt_regs *, long); 74 74 dotraplinkage void do_page_fault(struct pt_regs *, unsigned long); ··· 103 103 104 104 extern int panic_on_unrecovered_nmi; 105 105 106 - void math_error(struct pt_regs *, int, int); 107 106 void math_emulate(struct math_emu_info *); 108 107 #ifndef CONFIG_X86_32 109 108 asmlinkage void smp_thermal_interrupt(void);
+5 -5
arch/x86/include/asm/uprobes.h
··· 41 41 u8 ixol[MAX_UINSN_BYTES]; 42 42 }; 43 43 44 - u16 fixups; 45 44 const struct uprobe_xol_ops *ops; 46 45 47 46 union { 48 - #ifdef CONFIG_X86_64 49 - unsigned long rip_rela_target_address; 50 - #endif 51 47 struct { 52 48 s32 offs; 53 49 u8 ilen; 54 50 u8 opc1; 55 - } branch; 51 + } branch; 52 + struct { 53 + u8 fixups; 54 + u8 ilen; 55 + } defparam; 56 56 }; 57 57 }; 58 58
+1 -2
arch/x86/kernel/alternative.c
··· 5 5 #include <linux/mutex.h> 6 6 #include <linux/list.h> 7 7 #include <linux/stringify.h> 8 - #include <linux/kprobes.h> 9 8 #include <linux/mm.h> 10 9 #include <linux/vmalloc.h> 11 10 #include <linux/memory.h> ··· 550 551 * 551 552 * Note: Must be called under text_mutex. 552 553 */ 553 - void *__kprobes text_poke(void *addr, const void *opcode, size_t len) 554 + void *text_poke(void *addr, const void *opcode, size_t len) 554 555 { 555 556 unsigned long flags; 556 557 char *vaddr;
+2 -1
arch/x86/kernel/apic/hw_nmi.c
··· 60 60 smp_mb__after_atomic(); 61 61 } 62 62 63 - static int __kprobes 63 + static int 64 64 arch_trigger_all_cpu_backtrace_handler(unsigned int cmd, struct pt_regs *regs) 65 65 { 66 66 int cpu; ··· 80 80 81 81 return NMI_DONE; 82 82 } 83 + NOKPROBE_SYMBOL(arch_trigger_all_cpu_backtrace_handler); 83 84 84 85 static int __init register_trigger_all_cpu_backtrace(void) 85 86 {
+4
arch/x86/kernel/cpu/common.c
··· 8 8 #include <linux/delay.h> 9 9 #include <linux/sched.h> 10 10 #include <linux/init.h> 11 + #include <linux/kprobes.h> 11 12 #include <linux/kgdb.h> 12 13 #include <linux/smp.h> 13 14 #include <linux/io.h> ··· 1194 1193 (addr <= __get_cpu_var(debug_stack_addr) && 1195 1194 addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ)); 1196 1195 } 1196 + NOKPROBE_SYMBOL(is_debug_stack); 1197 1197 1198 1198 DEFINE_PER_CPU(u32, debug_idt_ctr); 1199 1199 ··· 1203 1201 this_cpu_inc(debug_idt_ctr); 1204 1202 load_current_idt(); 1205 1203 } 1204 + NOKPROBE_SYMBOL(debug_stack_set_zero); 1206 1205 1207 1206 void debug_stack_reset(void) 1208 1207 { ··· 1212 1209 if (this_cpu_dec_return(debug_idt_ctr) == 0) 1213 1210 load_current_idt(); 1214 1211 } 1212 + NOKPROBE_SYMBOL(debug_stack_reset); 1215 1213 1216 1214 #else /* CONFIG_X86_64 */ 1217 1215
+11 -10
arch/x86/kernel/cpu/perf_event.c
··· 303 303 hwc->sample_period = x86_pmu.max_period; 304 304 hwc->last_period = hwc->sample_period; 305 305 local64_set(&hwc->period_left, hwc->sample_period); 306 - } else { 307 - /* 308 - * If we have a PMU initialized but no APIC 309 - * interrupts, we cannot sample hardware 310 - * events (user-space has to fall back and 311 - * sample via a hrtimer based software event): 312 - */ 313 - if (!x86_pmu.apic) 314 - return -EOPNOTSUPP; 315 306 } 316 307 317 308 if (attr->type == PERF_TYPE_RAW) ··· 1284 1293 apic_write(APIC_LVTPC, APIC_DM_NMI); 1285 1294 } 1286 1295 1287 - static int __kprobes 1296 + static int 1288 1297 perf_event_nmi_handler(unsigned int cmd, struct pt_regs *regs) 1289 1298 { 1290 1299 u64 start_clock; ··· 1302 1311 1303 1312 return ret; 1304 1313 } 1314 + NOKPROBE_SYMBOL(perf_event_nmi_handler); 1305 1315 1306 1316 struct event_constraint emptyconstraint; 1307 1317 struct event_constraint unconstrained; ··· 1358 1366 x86_pmu.apic = 0; 1359 1367 pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n"); 1360 1368 pr_info("no hardware sampling interrupt available.\n"); 1369 + 1370 + /* 1371 + * If we have a PMU initialized but no APIC 1372 + * interrupts, we cannot sample hardware 1373 + * events (user-space has to fall back and 1374 + * sample via a hrtimer based software event): 1375 + */ 1376 + pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; 1377 + 1361 1378 } 1362 1379 1363 1380 static struct attribute_group x86_pmu_format_group = {
+2 -1
arch/x86/kernel/cpu/perf_event_amd_ibs.c
··· 593 593 return 1; 594 594 } 595 595 596 - static int __kprobes 596 + static int 597 597 perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs) 598 598 { 599 599 int handled = 0; ··· 606 606 607 607 return handled; 608 608 } 609 + NOKPROBE_SYMBOL(perf_ibs_nmi_handler); 609 610 610 611 static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name) 611 612 {
+5
arch/x86/kernel/cpu/perf_event_intel_lbr.c
··· 384 384 if (br_type & PERF_SAMPLE_BRANCH_NO_TX) 385 385 mask |= X86_BR_NO_TX; 386 386 387 + if (br_type & PERF_SAMPLE_BRANCH_COND) 388 + mask |= X86_BR_JCC; 389 + 387 390 /* 388 391 * stash actual user request into reg, it may 389 392 * be used by fixup code for some CPU ··· 681 678 * NHM/WSM erratum: must include IND_JMP to capture IND_CALL 682 679 */ 683 680 [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL | LBR_IND_JMP, 681 + [PERF_SAMPLE_BRANCH_COND] = LBR_JCC, 684 682 }; 685 683 686 684 static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = { ··· 693 689 [PERF_SAMPLE_BRANCH_ANY_CALL] = LBR_REL_CALL | LBR_IND_CALL 694 690 | LBR_FAR, 695 691 [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL, 692 + [PERF_SAMPLE_BRANCH_COND] = LBR_JCC, 696 693 }; 697 694 698 695 /* core */
+6 -3
arch/x86/kernel/dumpstack.c
··· 200 200 static int die_owner = -1; 201 201 static unsigned int die_nest_count; 202 202 203 - unsigned __kprobes long oops_begin(void) 203 + unsigned long oops_begin(void) 204 204 { 205 205 int cpu; 206 206 unsigned long flags; ··· 223 223 return flags; 224 224 } 225 225 EXPORT_SYMBOL_GPL(oops_begin); 226 + NOKPROBE_SYMBOL(oops_begin); 226 227 227 - void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) 228 + void oops_end(unsigned long flags, struct pt_regs *regs, int signr) 228 229 { 229 230 if (regs && kexec_should_crash(current)) 230 231 crash_kexec(regs); ··· 248 247 panic("Fatal exception"); 249 248 do_exit(signr); 250 249 } 250 + NOKPROBE_SYMBOL(oops_end); 251 251 252 - int __kprobes __die(const char *str, struct pt_regs *regs, long err) 252 + int __die(const char *str, struct pt_regs *regs, long err) 253 253 { 254 254 #ifdef CONFIG_X86_32 255 255 unsigned short ss; ··· 293 291 #endif 294 292 return 0; 295 293 } 294 + NOKPROBE_SYMBOL(__die); 296 295 297 296 /* 298 297 * This is gone through when something in the kernel has done something bad
-33
arch/x86/kernel/entry_32.S
··· 315 315 ENDPROC(ret_from_kernel_thread) 316 316 317 317 /* 318 - * Interrupt exit functions should be protected against kprobes 319 - */ 320 - .pushsection .kprobes.text, "ax" 321 - /* 322 318 * Return to user mode is not as complex as all this looks, 323 319 * but we want the default path for a system call return to 324 320 * go as quickly as possible which is why some of this is ··· 368 372 END(resume_kernel) 369 373 #endif 370 374 CFI_ENDPROC 371 - /* 372 - * End of kprobes section 373 - */ 374 - .popsection 375 375 376 376 /* SYSENTER_RETURN points to after the "sysenter" instruction in 377 377 the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ ··· 487 495 PTGS_TO_GS_EX 488 496 ENDPROC(ia32_sysenter_target) 489 497 490 - /* 491 - * syscall stub including irq exit should be protected against kprobes 492 - */ 493 - .pushsection .kprobes.text, "ax" 494 498 # system call handler stub 495 499 ENTRY(system_call) 496 500 RING0_INT_FRAME # can't unwind into user space anyway ··· 678 690 jmp resume_userspace 679 691 END(syscall_badsys) 680 692 CFI_ENDPROC 681 - /* 682 - * End of kprobes section 683 - */ 684 - .popsection 685 693 686 694 .macro FIXUP_ESPFIX_STACK 687 695 /* ··· 768 784 ENDPROC(common_interrupt) 769 785 CFI_ENDPROC 770 786 771 - /* 772 - * Irq entries should be protected against kprobes 773 - */ 774 - .pushsection .kprobes.text, "ax" 775 787 #define BUILD_INTERRUPT3(name, nr, fn) \ 776 788 ENTRY(name) \ 777 789 RING0_INT_FRAME; \ ··· 944 964 jmp error_code 945 965 CFI_ENDPROC 946 966 END(spurious_interrupt_bug) 947 - /* 948 - * End of kprobes section 949 - */ 950 - .popsection 951 967 952 968 #ifdef CONFIG_XEN 953 969 /* Xen doesn't set %esp to be precisely what the normal sysenter ··· 1218 1242 jmp *%ecx 1219 1243 #endif 1220 1244 1221 - /* 1222 - * Some functions should be protected against kprobes 1223 - */ 1224 - .pushsection .kprobes.text, "ax" 1225 - 1226 1245 #ifdef CONFIG_TRACING 1227 1246 ENTRY(trace_page_fault) 1228 1247 RING0_EC_FRAME ··· 1431 1460 END(async_page_fault) 1432 1461 #endif 1433 1462 1434 - /* 1435 - * End of kprobes section 1436 - */ 1437 - .popsection
-21
arch/x86/kernel/entry_64.S
··· 284 284 TRACE_IRQS_OFF 285 285 .endm 286 286 287 - /* save complete stack frame */ 288 - .pushsection .kprobes.text, "ax" 289 287 ENTRY(save_paranoid) 290 288 XCPT_FRAME 1 RDI+8 291 289 cld ··· 312 314 1: ret 313 315 CFI_ENDPROC 314 316 END(save_paranoid) 315 - .popsection 316 317 317 318 /* 318 319 * A newly forked process directly context switches into this address. ··· 769 772 call \func 770 773 .endm 771 774 772 - /* 773 - * Interrupt entry/exit should be protected against kprobes 774 - */ 775 - .pushsection .kprobes.text, "ax" 776 775 /* 777 776 * The interrupt stubs push (~vector+0x80) onto the stack and 778 777 * then jump to common_interrupt. ··· 974 981 #else 975 982 # define __do_double_fault do_double_fault 976 983 #endif 977 - 978 - /* 979 - * End of kprobes section 980 - */ 981 - .popsection 982 984 983 985 /* 984 986 * APIC interrupts. ··· 1308 1320 apicinterrupt3 HYPERVISOR_CALLBACK_VECTOR \ 1309 1321 hyperv_callback_vector hyperv_vector_handler 1310 1322 #endif /* CONFIG_HYPERV */ 1311 - 1312 - /* 1313 - * Some functions should be protected against kprobes 1314 - */ 1315 - .pushsection .kprobes.text, "ax" 1316 1323 1317 1324 idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK 1318 1325 idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK ··· 1725 1742 CFI_ENDPROC 1726 1743 END(ignore_sysret) 1727 1744 1728 - /* 1729 - * End of kprobes section 1730 - */ 1731 - .popsection
+2 -3
arch/x86/kernel/hw_breakpoint.c
··· 32 32 #include <linux/irqflags.h> 33 33 #include <linux/notifier.h> 34 34 #include <linux/kallsyms.h> 35 - #include <linux/kprobes.h> 36 35 #include <linux/percpu.h> 37 36 #include <linux/kdebug.h> 38 37 #include <linux/kernel.h> ··· 423 424 * NOTIFY_STOP returned for all other cases 424 425 * 425 426 */ 426 - static int __kprobes hw_breakpoint_handler(struct die_args *args) 427 + static int hw_breakpoint_handler(struct die_args *args) 427 428 { 428 429 int i, cpu, rc = NOTIFY_STOP; 429 430 struct perf_event *bp; ··· 510 511 /* 511 512 * Handle debug exception notifications. 512 513 */ 513 - int __kprobes hw_breakpoint_exceptions_notify( 514 + int hw_breakpoint_exceptions_notify( 514 515 struct notifier_block *unused, unsigned long val, void *data) 515 516 { 516 517 if (val != DIE_DEBUG)
+70 -58
arch/x86/kernel/kprobes/core.c
··· 112 112 113 113 const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist); 114 114 115 - static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op) 115 + static nokprobe_inline void 116 + __synthesize_relative_insn(void *from, void *to, u8 op) 116 117 { 117 118 struct __arch_relative_insn { 118 119 u8 op; ··· 126 125 } 127 126 128 127 /* Insert a jump instruction at address 'from', which jumps to address 'to'.*/ 129 - void __kprobes synthesize_reljump(void *from, void *to) 128 + void synthesize_reljump(void *from, void *to) 130 129 { 131 130 __synthesize_relative_insn(from, to, RELATIVEJUMP_OPCODE); 132 131 } 132 + NOKPROBE_SYMBOL(synthesize_reljump); 133 133 134 134 /* Insert a call instruction at address 'from', which calls address 'to'.*/ 135 - void __kprobes synthesize_relcall(void *from, void *to) 135 + void synthesize_relcall(void *from, void *to) 136 136 { 137 137 __synthesize_relative_insn(from, to, RELATIVECALL_OPCODE); 138 138 } 139 + NOKPROBE_SYMBOL(synthesize_relcall); 139 140 140 141 /* 141 142 * Skip the prefixes of the instruction. 142 143 */ 143 - static kprobe_opcode_t *__kprobes skip_prefixes(kprobe_opcode_t *insn) 144 + static kprobe_opcode_t *skip_prefixes(kprobe_opcode_t *insn) 144 145 { 145 146 insn_attr_t attr; 146 147 ··· 157 154 #endif 158 155 return insn; 159 156 } 157 + NOKPROBE_SYMBOL(skip_prefixes); 160 158 161 159 /* 162 160 * Returns non-zero if opcode is boostable. 163 161 * RIP relative instructions are adjusted at copying time in 64 bits mode 164 162 */ 165 - int __kprobes can_boost(kprobe_opcode_t *opcodes) 163 + int can_boost(kprobe_opcode_t *opcodes) 166 164 { 167 165 kprobe_opcode_t opcode; 168 166 kprobe_opcode_t *orig_opcodes = opcodes; ··· 264 260 } 265 261 266 262 /* Check if paddr is at an instruction boundary */ 267 - static int __kprobes can_probe(unsigned long paddr) 263 + static int can_probe(unsigned long paddr) 268 264 { 269 265 unsigned long addr, __addr, offset = 0; 270 266 struct insn insn; ··· 303 299 /* 304 300 * Returns non-zero if opcode modifies the interrupt flag. 305 301 */ 306 - static int __kprobes is_IF_modifier(kprobe_opcode_t *insn) 302 + static int is_IF_modifier(kprobe_opcode_t *insn) 307 303 { 308 304 /* Skip prefixes */ 309 305 insn = skip_prefixes(insn); ··· 326 322 * If not, return null. 327 323 * Only applicable to 64-bit x86. 328 324 */ 329 - int __kprobes __copy_instruction(u8 *dest, u8 *src) 325 + int __copy_instruction(u8 *dest, u8 *src) 330 326 { 331 327 struct insn insn; 332 328 kprobe_opcode_t buf[MAX_INSN_SIZE]; ··· 369 365 return insn.length; 370 366 } 371 367 372 - static int __kprobes arch_copy_kprobe(struct kprobe *p) 368 + static int arch_copy_kprobe(struct kprobe *p) 373 369 { 374 370 int ret; 375 371 ··· 396 392 return 0; 397 393 } 398 394 399 - int __kprobes arch_prepare_kprobe(struct kprobe *p) 395 + int arch_prepare_kprobe(struct kprobe *p) 400 396 { 401 397 if (alternatives_text_reserved(p->addr, p->addr)) 402 398 return -EINVAL; ··· 411 407 return arch_copy_kprobe(p); 412 408 } 413 409 414 - void __kprobes arch_arm_kprobe(struct kprobe *p) 410 + void arch_arm_kprobe(struct kprobe *p) 415 411 { 416 412 text_poke(p->addr, ((unsigned char []){BREAKPOINT_INSTRUCTION}), 1); 417 413 } 418 414 419 - void __kprobes arch_disarm_kprobe(struct kprobe *p) 415 + void arch_disarm_kprobe(struct kprobe *p) 420 416 { 421 417 text_poke(p->addr, &p->opcode, 1); 422 418 } 423 419 424 - void __kprobes arch_remove_kprobe(struct kprobe *p) 420 + void arch_remove_kprobe(struct kprobe *p) 425 421 { 426 422 if (p->ainsn.insn) { 427 423 free_insn_slot(p->ainsn.insn, (p->ainsn.boostable == 1)); ··· 429 425 } 430 426 } 431 427 432 - static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) 428 + static nokprobe_inline void 429 + save_previous_kprobe(struct kprobe_ctlblk *kcb) 433 430 { 434 431 kcb->prev_kprobe.kp = kprobe_running(); 435 432 kcb->prev_kprobe.status = kcb->kprobe_status; ··· 438 433 kcb->prev_kprobe.saved_flags = kcb->kprobe_saved_flags; 439 434 } 440 435 441 - static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) 436 + static nokprobe_inline void 437 + restore_previous_kprobe(struct kprobe_ctlblk *kcb) 442 438 { 443 439 __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp); 444 440 kcb->kprobe_status = kcb->prev_kprobe.status; ··· 447 441 kcb->kprobe_saved_flags = kcb->prev_kprobe.saved_flags; 448 442 } 449 443 450 - static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, 451 - struct kprobe_ctlblk *kcb) 444 + static nokprobe_inline void 445 + set_current_kprobe(struct kprobe *p, struct pt_regs *regs, 446 + struct kprobe_ctlblk *kcb) 452 447 { 453 448 __this_cpu_write(current_kprobe, p); 454 449 kcb->kprobe_saved_flags = kcb->kprobe_old_flags ··· 458 451 kcb->kprobe_saved_flags &= ~X86_EFLAGS_IF; 459 452 } 460 453 461 - static void __kprobes clear_btf(void) 454 + static nokprobe_inline void clear_btf(void) 462 455 { 463 456 if (test_thread_flag(TIF_BLOCKSTEP)) { 464 457 unsigned long debugctl = get_debugctlmsr(); ··· 468 461 } 469 462 } 470 463 471 - static void __kprobes restore_btf(void) 464 + static nokprobe_inline void restore_btf(void) 472 465 { 473 466 if (test_thread_flag(TIF_BLOCKSTEP)) { 474 467 unsigned long debugctl = get_debugctlmsr(); ··· 478 471 } 479 472 } 480 473 481 - void __kprobes 482 - arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) 474 + void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) 483 475 { 484 476 unsigned long *sara = stack_addr(regs); 485 477 ··· 487 481 /* Replace the return addr with trampoline addr */ 488 482 *sara = (unsigned long) &kretprobe_trampoline; 489 483 } 484 + NOKPROBE_SYMBOL(arch_prepare_kretprobe); 490 485 491 - static void __kprobes 492 - setup_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb, int reenter) 486 + static void setup_singlestep(struct kprobe *p, struct pt_regs *regs, 487 + struct kprobe_ctlblk *kcb, int reenter) 493 488 { 494 489 if (setup_detour_execution(p, regs, reenter)) 495 490 return; ··· 526 519 else 527 520 regs->ip = (unsigned long)p->ainsn.insn; 528 521 } 522 + NOKPROBE_SYMBOL(setup_singlestep); 529 523 530 524 /* 531 525 * We have reentered the kprobe_handler(), since another probe was hit while 532 526 * within the handler. We save the original kprobes variables and just single 533 527 * step on the instruction of the new probe without calling any user handlers. 534 528 */ 535 - static int __kprobes 536 - reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) 529 + static int reenter_kprobe(struct kprobe *p, struct pt_regs *regs, 530 + struct kprobe_ctlblk *kcb) 537 531 { 538 532 switch (kcb->kprobe_status) { 539 533 case KPROBE_HIT_SSDONE: 540 534 case KPROBE_HIT_ACTIVE: 535 + case KPROBE_HIT_SS: 541 536 kprobes_inc_nmissed_count(p); 542 537 setup_singlestep(p, regs, kcb, 1); 543 538 break; 544 - case KPROBE_HIT_SS: 539 + case KPROBE_REENTER: 545 540 /* A probe has been hit in the codepath leading up to, or just 546 541 * after, single-stepping of a probed instruction. This entire 547 542 * codepath should strictly reside in .kprobes.text section. ··· 562 553 563 554 return 1; 564 555 } 556 + NOKPROBE_SYMBOL(reenter_kprobe); 565 557 566 558 /* 567 559 * Interrupts are disabled on entry as trap3 is an interrupt gate and they 568 560 * remain disabled throughout this function. 569 561 */ 570 - static int __kprobes kprobe_handler(struct pt_regs *regs) 562 + int kprobe_int3_handler(struct pt_regs *regs) 571 563 { 572 564 kprobe_opcode_t *addr; 573 565 struct kprobe *p; ··· 631 621 preempt_enable_no_resched(); 632 622 return 0; 633 623 } 624 + NOKPROBE_SYMBOL(kprobe_int3_handler); 634 625 635 626 /* 636 627 * When a retprobed function returns, this code saves registers and 637 628 * calls trampoline_handler() runs, which calls the kretprobe's handler. 638 629 */ 639 - static void __used __kprobes kretprobe_trampoline_holder(void) 630 + static void __used kretprobe_trampoline_holder(void) 640 631 { 641 632 asm volatile ( 642 633 ".global kretprobe_trampoline\n" ··· 668 657 #endif 669 658 " ret\n"); 670 659 } 660 + NOKPROBE_SYMBOL(kretprobe_trampoline_holder); 661 + NOKPROBE_SYMBOL(kretprobe_trampoline); 671 662 672 663 /* 673 664 * Called from kretprobe_trampoline 674 665 */ 675 - __visible __used __kprobes void *trampoline_handler(struct pt_regs *regs) 666 + __visible __used void *trampoline_handler(struct pt_regs *regs) 676 667 { 677 668 struct kretprobe_instance *ri = NULL; 678 669 struct hlist_head *head, empty_rp; ··· 760 747 } 761 748 return (void *)orig_ret_address; 762 749 } 750 + NOKPROBE_SYMBOL(trampoline_handler); 763 751 764 752 /* 765 753 * Called after single-stepping. p->addr is the address of the ··· 789 775 * jump instruction after the copied instruction, that jumps to the next 790 776 * instruction after the probepoint. 791 777 */ 792 - static void __kprobes 793 - resume_execution(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) 778 + static void resume_execution(struct kprobe *p, struct pt_regs *regs, 779 + struct kprobe_ctlblk *kcb) 794 780 { 795 781 unsigned long *tos = stack_addr(regs); 796 782 unsigned long copy_ip = (unsigned long)p->ainsn.insn; ··· 865 851 no_change: 866 852 restore_btf(); 867 853 } 854 + NOKPROBE_SYMBOL(resume_execution); 868 855 869 856 /* 870 857 * Interrupts are disabled on entry as trap1 is an interrupt gate and they 871 858 * remain disabled throughout this function. 872 859 */ 873 - static int __kprobes post_kprobe_handler(struct pt_regs *regs) 860 + int kprobe_debug_handler(struct pt_regs *regs) 874 861 { 875 862 struct kprobe *cur = kprobe_running(); 876 863 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); ··· 906 891 907 892 return 1; 908 893 } 894 + NOKPROBE_SYMBOL(kprobe_debug_handler); 909 895 910 - int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) 896 + int kprobe_fault_handler(struct pt_regs *regs, int trapnr) 911 897 { 912 898 struct kprobe *cur = kprobe_running(); 913 899 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); ··· 965 949 966 950 return 0; 967 951 } 952 + NOKPROBE_SYMBOL(kprobe_fault_handler); 968 953 969 954 /* 970 955 * Wrapper routine for handling exceptions. 971 956 */ 972 - int __kprobes 973 - kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data) 957 + int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, 958 + void *data) 974 959 { 975 960 struct die_args *args = data; 976 961 int ret = NOTIFY_DONE; ··· 979 962 if (args->regs && user_mode_vm(args->regs)) 980 963 return ret; 981 964 982 - switch (val) { 983 - case DIE_INT3: 984 - if (kprobe_handler(args->regs)) 985 - ret = NOTIFY_STOP; 986 - break; 987 - case DIE_DEBUG: 988 - if (post_kprobe_handler(args->regs)) { 989 - /* 990 - * Reset the BS bit in dr6 (pointed by args->err) to 991 - * denote completion of processing 992 - */ 993 - (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP; 994 - ret = NOTIFY_STOP; 995 - } 996 - break; 997 - case DIE_GPF: 965 + if (val == DIE_GPF) { 998 966 /* 999 967 * To be potentially processing a kprobe fault and to 1000 968 * trust the result from kprobe_running(), we have ··· 988 986 if (!preemptible() && kprobe_running() && 989 987 kprobe_fault_handler(args->regs, args->trapnr)) 990 988 ret = NOTIFY_STOP; 991 - break; 992 - default: 993 - break; 994 989 } 995 990 return ret; 996 991 } 992 + NOKPROBE_SYMBOL(kprobe_exceptions_notify); 997 993 998 - int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) 994 + int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) 999 995 { 1000 996 struct jprobe *jp = container_of(p, struct jprobe, kp); 1001 997 unsigned long addr; ··· 1017 1017 regs->ip = (unsigned long)(jp->entry); 1018 1018 return 1; 1019 1019 } 1020 + NOKPROBE_SYMBOL(setjmp_pre_handler); 1020 1021 1021 - void __kprobes jprobe_return(void) 1022 + void jprobe_return(void) 1022 1023 { 1023 1024 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); 1024 1025 ··· 1035 1034 " nop \n"::"b" 1036 1035 (kcb->jprobe_saved_sp):"memory"); 1037 1036 } 1037 + NOKPROBE_SYMBOL(jprobe_return); 1038 + NOKPROBE_SYMBOL(jprobe_return_end); 1038 1039 1039 - int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) 1040 + int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) 1040 1041 { 1041 1042 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); 1042 1043 u8 *addr = (u8 *) (regs->ip - 1); ··· 1066 1063 } 1067 1064 return 0; 1068 1065 } 1066 + NOKPROBE_SYMBOL(longjmp_break_handler); 1067 + 1068 + bool arch_within_kprobe_blacklist(unsigned long addr) 1069 + { 1070 + return (addr >= (unsigned long)__kprobes_text_start && 1071 + addr < (unsigned long)__kprobes_text_end) || 1072 + (addr >= (unsigned long)__entry_text_start && 1073 + addr < (unsigned long)__entry_text_end); 1074 + } 1069 1075 1070 1076 int __init arch_init_kprobes(void) 1071 1077 { 1072 1078 return 0; 1073 1079 } 1074 1080 1075 - int __kprobes arch_trampoline_kprobe(struct kprobe *p) 1081 + int arch_trampoline_kprobe(struct kprobe *p) 1076 1082 { 1077 1083 return 0; 1078 1084 }
+10 -7
arch/x86/kernel/kprobes/ftrace.c
··· 25 25 26 26 #include "common.h" 27 27 28 - static int __skip_singlestep(struct kprobe *p, struct pt_regs *regs, 29 - struct kprobe_ctlblk *kcb) 28 + static nokprobe_inline 29 + int __skip_singlestep(struct kprobe *p, struct pt_regs *regs, 30 + struct kprobe_ctlblk *kcb) 30 31 { 31 32 /* 32 33 * Emulate singlestep (and also recover regs->ip) ··· 42 41 return 1; 43 42 } 44 43 45 - int __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs, 46 - struct kprobe_ctlblk *kcb) 44 + int skip_singlestep(struct kprobe *p, struct pt_regs *regs, 45 + struct kprobe_ctlblk *kcb) 47 46 { 48 47 if (kprobe_ftrace(p)) 49 48 return __skip_singlestep(p, regs, kcb); 50 49 else 51 50 return 0; 52 51 } 52 + NOKPROBE_SYMBOL(skip_singlestep); 53 53 54 54 /* Ftrace callback handler for kprobes */ 55 - void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, 56 - struct ftrace_ops *ops, struct pt_regs *regs) 55 + void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, 56 + struct ftrace_ops *ops, struct pt_regs *regs) 57 57 { 58 58 struct kprobe *p; 59 59 struct kprobe_ctlblk *kcb; ··· 86 84 end: 87 85 local_irq_restore(flags); 88 86 } 87 + NOKPROBE_SYMBOL(kprobe_ftrace_handler); 89 88 90 - int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p) 89 + int arch_prepare_kprobe_ftrace(struct kprobe *p) 91 90 { 92 91 p->ainsn.insn = NULL; 93 92 p->ainsn.boostable = -1;
+17 -15
arch/x86/kernel/kprobes/opt.c
··· 77 77 } 78 78 79 79 /* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */ 80 - static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val) 80 + static void synthesize_set_arg1(kprobe_opcode_t *addr, unsigned long val) 81 81 { 82 82 #ifdef CONFIG_X86_64 83 83 *addr++ = 0x48; ··· 138 138 #define INT3_SIZE sizeof(kprobe_opcode_t) 139 139 140 140 /* Optimized kprobe call back function: called from optinsn */ 141 - static void __kprobes optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) 141 + static void 142 + optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) 142 143 { 143 144 struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); 144 145 unsigned long flags; ··· 169 168 } 170 169 local_irq_restore(flags); 171 170 } 171 + NOKPROBE_SYMBOL(optimized_callback); 172 172 173 - static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src) 173 + static int copy_optimized_instructions(u8 *dest, u8 *src) 174 174 { 175 175 int len = 0, ret; 176 176 ··· 191 189 } 192 190 193 191 /* Check whether insn is indirect jump */ 194 - static int __kprobes insn_is_indirect_jump(struct insn *insn) 192 + static int insn_is_indirect_jump(struct insn *insn) 195 193 { 196 194 return ((insn->opcode.bytes[0] == 0xff && 197 195 (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */ ··· 226 224 } 227 225 228 226 /* Decode whole function to ensure any instructions don't jump into target */ 229 - static int __kprobes can_optimize(unsigned long paddr) 227 + static int can_optimize(unsigned long paddr) 230 228 { 231 229 unsigned long addr, size = 0, offset = 0; 232 230 struct insn insn; ··· 277 275 } 278 276 279 277 /* Check optimized_kprobe can actually be optimized. */ 280 - int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op) 278 + int arch_check_optimized_kprobe(struct optimized_kprobe *op) 281 279 { 282 280 int i; 283 281 struct kprobe *p; ··· 292 290 } 293 291 294 292 /* Check the addr is within the optimized instructions. */ 295 - int __kprobes 296 - arch_within_optimized_kprobe(struct optimized_kprobe *op, unsigned long addr) 293 + int arch_within_optimized_kprobe(struct optimized_kprobe *op, 294 + unsigned long addr) 297 295 { 298 296 return ((unsigned long)op->kp.addr <= addr && 299 297 (unsigned long)op->kp.addr + op->optinsn.size > addr); 300 298 } 301 299 302 300 /* Free optimized instruction slot */ 303 - static __kprobes 301 + static 304 302 void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty) 305 303 { 306 304 if (op->optinsn.insn) { ··· 310 308 } 311 309 } 312 310 313 - void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op) 311 + void arch_remove_optimized_kprobe(struct optimized_kprobe *op) 314 312 { 315 313 __arch_remove_optimized_kprobe(op, 1); 316 314 } ··· 320 318 * Target instructions MUST be relocatable (checked inside) 321 319 * This is called when new aggr(opt)probe is allocated or reused. 322 320 */ 323 - int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op) 321 + int arch_prepare_optimized_kprobe(struct optimized_kprobe *op) 324 322 { 325 323 u8 *buf; 326 324 int ret; ··· 374 372 * Replace breakpoints (int3) with relative jumps. 375 373 * Caller must call with locking kprobe_mutex and text_mutex. 376 374 */ 377 - void __kprobes arch_optimize_kprobes(struct list_head *oplist) 375 + void arch_optimize_kprobes(struct list_head *oplist) 378 376 { 379 377 struct optimized_kprobe *op, *tmp; 380 378 u8 insn_buf[RELATIVEJUMP_SIZE]; ··· 400 398 } 401 399 402 400 /* Replace a relative jump with a breakpoint (int3). */ 403 - void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op) 401 + void arch_unoptimize_kprobe(struct optimized_kprobe *op) 404 402 { 405 403 u8 insn_buf[RELATIVEJUMP_SIZE]; 406 404 ··· 426 424 } 427 425 } 428 426 429 - int __kprobes 430 - setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter) 427 + int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter) 431 428 { 432 429 struct optimized_kprobe *op; 433 430 ··· 442 441 } 443 442 return 0; 444 443 } 444 + NOKPROBE_SYMBOL(setup_detour_execution);
+3 -1
arch/x86/kernel/kvm.c
··· 251 251 return reason; 252 252 } 253 253 EXPORT_SYMBOL_GPL(kvm_read_and_reset_pf_reason); 254 + NOKPROBE_SYMBOL(kvm_read_and_reset_pf_reason); 254 255 255 - dotraplinkage void __kprobes 256 + dotraplinkage void 256 257 do_async_page_fault(struct pt_regs *regs, unsigned long error_code) 257 258 { 258 259 enum ctx_state prev_state; ··· 277 276 break; 278 277 } 279 278 } 279 + NOKPROBE_SYMBOL(do_async_page_fault); 280 280 281 281 static void __init paravirt_ops_setup(void) 282 282 {
+12 -6
arch/x86/kernel/nmi.c
··· 110 110 a->handler, whole_msecs, decimal_msecs); 111 111 } 112 112 113 - static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b) 113 + static int nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b) 114 114 { 115 115 struct nmi_desc *desc = nmi_to_desc(type); 116 116 struct nmiaction *a; ··· 146 146 /* return total number of NMI events handled */ 147 147 return handled; 148 148 } 149 + NOKPROBE_SYMBOL(nmi_handle); 149 150 150 151 int __register_nmi_handler(unsigned int type, struct nmiaction *action) 151 152 { ··· 209 208 } 210 209 EXPORT_SYMBOL_GPL(unregister_nmi_handler); 211 210 212 - static __kprobes void 211 + static void 213 212 pci_serr_error(unsigned char reason, struct pt_regs *regs) 214 213 { 215 214 /* check to see if anyone registered against these types of errors */ ··· 239 238 reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR; 240 239 outb(reason, NMI_REASON_PORT); 241 240 } 241 + NOKPROBE_SYMBOL(pci_serr_error); 242 242 243 - static __kprobes void 243 + static void 244 244 io_check_error(unsigned char reason, struct pt_regs *regs) 245 245 { 246 246 unsigned long i; ··· 271 269 reason &= ~NMI_REASON_CLEAR_IOCHK; 272 270 outb(reason, NMI_REASON_PORT); 273 271 } 272 + NOKPROBE_SYMBOL(io_check_error); 274 273 275 - static __kprobes void 274 + static void 276 275 unknown_nmi_error(unsigned char reason, struct pt_regs *regs) 277 276 { 278 277 int handled; ··· 301 298 302 299 pr_emerg("Dazed and confused, but trying to continue\n"); 303 300 } 301 + NOKPROBE_SYMBOL(unknown_nmi_error); 304 302 305 303 static DEFINE_PER_CPU(bool, swallow_nmi); 306 304 static DEFINE_PER_CPU(unsigned long, last_nmi_rip); 307 305 308 - static __kprobes void default_do_nmi(struct pt_regs *regs) 306 + static void default_do_nmi(struct pt_regs *regs) 309 307 { 310 308 unsigned char reason = 0; 311 309 int handled; ··· 405 401 else 406 402 unknown_nmi_error(reason, regs); 407 403 } 404 + NOKPROBE_SYMBOL(default_do_nmi); 408 405 409 406 /* 410 407 * NMIs can hit breakpoints which will cause it to lose its ··· 525 520 } 526 521 #endif 527 522 528 - dotraplinkage notrace __kprobes void 523 + dotraplinkage notrace void 529 524 do_nmi(struct pt_regs *regs, long error_code) 530 525 { 531 526 nmi_nesting_preprocess(regs); ··· 542 537 /* On i386, may loop back to preprocess */ 543 538 nmi_nesting_postprocess(); 544 539 } 540 + NOKPROBE_SYMBOL(do_nmi); 545 541 546 542 void stop_nmi(void) 547 543 {
+6
arch/x86/kernel/paravirt.c
··· 23 23 #include <linux/efi.h> 24 24 #include <linux/bcd.h> 25 25 #include <linux/highmem.h> 26 + #include <linux/kprobes.h> 26 27 27 28 #include <asm/bug.h> 28 29 #include <asm/paravirt.h> ··· 389 388 .start_context_switch = paravirt_nop, 390 389 .end_context_switch = paravirt_nop, 391 390 }; 391 + 392 + /* At this point, native_get/set_debugreg has real function entries */ 393 + NOKPROBE_SYMBOL(native_get_debugreg); 394 + NOKPROBE_SYMBOL(native_set_debugreg); 395 + NOKPROBE_SYMBOL(native_load_idt); 392 396 393 397 struct pv_apic_ops pv_apic_ops = { 394 398 #ifdef CONFIG_X86_LOCAL_APIC
+4 -3
arch/x86/kernel/process_64.c
··· 413 413 set_thread_flag(TIF_ADDR32); 414 414 415 415 /* Mark the associated mm as containing 32-bit tasks. */ 416 - if (current->mm) 417 - current->mm->context.ia32_compat = 1; 418 - 419 416 if (x32) { 420 417 clear_thread_flag(TIF_IA32); 421 418 set_thread_flag(TIF_X32); 419 + if (current->mm) 420 + current->mm->context.ia32_compat = TIF_X32; 422 421 current->personality &= ~READ_IMPLIES_EXEC; 423 422 /* is_compat_task() uses the presence of the x32 424 423 syscall bit flag to determine compat status */ ··· 425 426 } else { 426 427 set_thread_flag(TIF_IA32); 427 428 clear_thread_flag(TIF_X32); 429 + if (current->mm) 430 + current->mm->context.ia32_compat = TIF_IA32; 428 431 current->personality |= force_personality32; 429 432 /* Prepare the first "return" to user space */ 430 433 current_thread_info()->status |= TS_COMPAT;
+88 -59
arch/x86/kernel/traps.c
··· 23 23 #include <linux/kernel.h> 24 24 #include <linux/module.h> 25 25 #include <linux/ptrace.h> 26 + #include <linux/uprobes.h> 26 27 #include <linux/string.h> 27 28 #include <linux/delay.h> 28 29 #include <linux/errno.h> ··· 107 106 preempt_count_dec(); 108 107 } 109 108 110 - static int __kprobes 109 + static nokprobe_inline int 111 110 do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str, 112 111 struct pt_regs *regs, long error_code) 113 112 { ··· 137 136 return -1; 138 137 } 139 138 140 - static void __kprobes 139 + static siginfo_t *fill_trap_info(struct pt_regs *regs, int signr, int trapnr, 140 + siginfo_t *info) 141 + { 142 + unsigned long siaddr; 143 + int sicode; 144 + 145 + switch (trapnr) { 146 + default: 147 + return SEND_SIG_PRIV; 148 + 149 + case X86_TRAP_DE: 150 + sicode = FPE_INTDIV; 151 + siaddr = uprobe_get_trap_addr(regs); 152 + break; 153 + case X86_TRAP_UD: 154 + sicode = ILL_ILLOPN; 155 + siaddr = uprobe_get_trap_addr(regs); 156 + break; 157 + case X86_TRAP_AC: 158 + sicode = BUS_ADRALN; 159 + siaddr = 0; 160 + break; 161 + } 162 + 163 + info->si_signo = signr; 164 + info->si_errno = 0; 165 + info->si_code = sicode; 166 + info->si_addr = (void __user *)siaddr; 167 + return info; 168 + } 169 + 170 + static void 141 171 do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, 142 172 long error_code, siginfo_t *info) 143 173 { ··· 200 168 } 201 169 #endif 202 170 203 - if (info) 204 - force_sig_info(signr, info, tsk); 205 - else 206 - force_sig(signr, tsk); 171 + force_sig_info(signr, info ?: SEND_SIG_PRIV, tsk); 172 + } 173 + NOKPROBE_SYMBOL(do_trap); 174 + 175 + static void do_error_trap(struct pt_regs *regs, long error_code, char *str, 176 + unsigned long trapnr, int signr) 177 + { 178 + enum ctx_state prev_state = exception_enter(); 179 + siginfo_t info; 180 + 181 + if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) != 182 + NOTIFY_STOP) { 183 + conditional_sti(regs); 184 + do_trap(trapnr, signr, str, regs, error_code, 185 + fill_trap_info(regs, signr, trapnr, &info)); 186 + } 187 + 188 + exception_exit(prev_state); 207 189 } 208 190 209 191 #define DO_ERROR(trapnr, signr, str, name) \ 210 192 dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ 211 193 { \ 212 - enum ctx_state prev_state; \ 213 - \ 214 - prev_state = exception_enter(); \ 215 - if (notify_die(DIE_TRAP, str, regs, error_code, \ 216 - trapnr, signr) == NOTIFY_STOP) { \ 217 - exception_exit(prev_state); \ 218 - return; \ 219 - } \ 220 - conditional_sti(regs); \ 221 - do_trap(trapnr, signr, str, regs, error_code, NULL); \ 222 - exception_exit(prev_state); \ 194 + do_error_trap(regs, error_code, str, trapnr, signr); \ 223 195 } 224 196 225 - #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ 226 - dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ 227 - { \ 228 - siginfo_t info; \ 229 - enum ctx_state prev_state; \ 230 - \ 231 - info.si_signo = signr; \ 232 - info.si_errno = 0; \ 233 - info.si_code = sicode; \ 234 - info.si_addr = (void __user *)siaddr; \ 235 - prev_state = exception_enter(); \ 236 - if (notify_die(DIE_TRAP, str, regs, error_code, \ 237 - trapnr, signr) == NOTIFY_STOP) { \ 238 - exception_exit(prev_state); \ 239 - return; \ 240 - } \ 241 - conditional_sti(regs); \ 242 - do_trap(trapnr, signr, str, regs, error_code, &info); \ 243 - exception_exit(prev_state); \ 244 - } 245 - 246 - DO_ERROR_INFO(X86_TRAP_DE, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip ) 247 - DO_ERROR (X86_TRAP_OF, SIGSEGV, "overflow", overflow ) 248 - DO_ERROR (X86_TRAP_BR, SIGSEGV, "bounds", bounds ) 249 - DO_ERROR_INFO(X86_TRAP_UD, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip ) 250 - DO_ERROR (X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun ) 251 - DO_ERROR (X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS ) 252 - DO_ERROR (X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present ) 197 + DO_ERROR(X86_TRAP_DE, SIGFPE, "divide error", divide_error) 198 + DO_ERROR(X86_TRAP_OF, SIGSEGV, "overflow", overflow) 199 + DO_ERROR(X86_TRAP_BR, SIGSEGV, "bounds", bounds) 200 + DO_ERROR(X86_TRAP_UD, SIGILL, "invalid opcode", invalid_op) 201 + DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun",coprocessor_segment_overrun) 202 + DO_ERROR(X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS) 203 + DO_ERROR(X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present) 253 204 #ifdef CONFIG_X86_32 254 - DO_ERROR (X86_TRAP_SS, SIGBUS, "stack segment", stack_segment ) 205 + DO_ERROR(X86_TRAP_SS, SIGBUS, "stack segment", stack_segment) 255 206 #endif 256 - DO_ERROR_INFO(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0 ) 207 + DO_ERROR(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check) 257 208 258 209 #ifdef CONFIG_X86_64 259 210 /* Runs on IST stack */ ··· 278 263 } 279 264 #endif 280 265 281 - dotraplinkage void __kprobes 266 + dotraplinkage void 282 267 do_general_protection(struct pt_regs *regs, long error_code) 283 268 { 284 269 struct task_struct *tsk; ··· 320 305 pr_cont("\n"); 321 306 } 322 307 323 - force_sig(SIGSEGV, tsk); 308 + force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk); 324 309 exit: 325 310 exception_exit(prev_state); 326 311 } 312 + NOKPROBE_SYMBOL(do_general_protection); 327 313 328 314 /* May run on IST stack. */ 329 - dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_code) 315 + dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) 330 316 { 331 317 enum ctx_state prev_state; 332 318 ··· 343 327 if (poke_int3_handler(regs)) 344 328 return; 345 329 346 - prev_state = exception_enter(); 347 330 #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP 348 331 if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, 349 332 SIGTRAP) == NOTIFY_STOP) 350 333 goto exit; 351 334 #endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */ 335 + 336 + #ifdef CONFIG_KPROBES 337 + if (kprobe_int3_handler(regs)) 338 + return; 339 + #endif 340 + prev_state = exception_enter(); 352 341 353 342 if (notify_die(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, 354 343 SIGTRAP) == NOTIFY_STOP) ··· 371 350 exit: 372 351 exception_exit(prev_state); 373 352 } 353 + NOKPROBE_SYMBOL(do_int3); 374 354 375 355 #ifdef CONFIG_X86_64 376 356 /* ··· 379 357 * for scheduling or signal handling. The actual stack switch is done in 380 358 * entry.S 381 359 */ 382 - asmlinkage __visible __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) 360 + asmlinkage __visible struct pt_regs *sync_regs(struct pt_regs *eregs) 383 361 { 384 362 struct pt_regs *regs = eregs; 385 363 /* Did already sync */ ··· 398 376 *regs = *eregs; 399 377 return regs; 400 378 } 379 + NOKPROBE_SYMBOL(sync_regs); 401 380 #endif 402 381 403 382 /* ··· 425 402 * 426 403 * May run on IST stack. 427 404 */ 428 - dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) 405 + dotraplinkage void do_debug(struct pt_regs *regs, long error_code) 429 406 { 430 407 struct task_struct *tsk = current; 431 408 enum ctx_state prev_state; 432 409 int user_icebp = 0; 433 410 unsigned long dr6; 434 411 int si_code; 435 - 436 - prev_state = exception_enter(); 437 412 438 413 get_debugreg(dr6, 6); 439 414 ··· 460 439 461 440 /* Store the virtualized DR6 value */ 462 441 tsk->thread.debugreg6 = dr6; 442 + 443 + #ifdef CONFIG_KPROBES 444 + if (kprobe_debug_handler(regs)) 445 + goto exit; 446 + #endif 447 + prev_state = exception_enter(); 463 448 464 449 if (notify_die(DIE_DEBUG, "debug", regs, (long)&dr6, error_code, 465 450 SIGTRAP) == NOTIFY_STOP) ··· 509 482 exit: 510 483 exception_exit(prev_state); 511 484 } 485 + NOKPROBE_SYMBOL(do_debug); 512 486 513 487 /* 514 488 * Note that we play around with the 'TS' bit in an attempt to get 515 489 * the correct behaviour even in the presence of the asynchronous 516 490 * IRQ13 behaviour 517 491 */ 518 - void math_error(struct pt_regs *regs, int error_code, int trapnr) 492 + static void math_error(struct pt_regs *regs, int error_code, int trapnr) 519 493 { 520 494 struct task_struct *task = current; 521 495 siginfo_t info; ··· 546 518 task->thread.error_code = error_code; 547 519 info.si_signo = SIGFPE; 548 520 info.si_errno = 0; 549 - info.si_addr = (void __user *)regs->ip; 521 + info.si_addr = (void __user *)uprobe_get_trap_addr(regs); 550 522 if (trapnr == X86_TRAP_MF) { 551 523 unsigned short cwd, swd; 552 524 /* ··· 673 645 */ 674 646 if (unlikely(restore_fpu_checking(tsk))) { 675 647 drop_init_fpu(tsk); 676 - force_sig(SIGSEGV, tsk); 648 + force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk); 677 649 return; 678 650 } 679 651 ··· 681 653 } 682 654 EXPORT_SYMBOL_GPL(math_state_restore); 683 655 684 - dotraplinkage void __kprobes 656 + dotraplinkage void 685 657 do_device_not_available(struct pt_regs *regs, long error_code) 686 658 { 687 659 enum ctx_state prev_state; ··· 707 679 #endif 708 680 exception_exit(prev_state); 709 681 } 682 + NOKPROBE_SYMBOL(do_device_not_available); 710 683 711 684 #ifdef CONFIG_X86_32 712 685 dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
+289 -236
arch/x86/kernel/uprobes.c
··· 32 32 33 33 /* Post-execution fixups. */ 34 34 35 - /* No fixup needed */ 36 - #define UPROBE_FIX_NONE 0x0 37 - 38 35 /* Adjust IP back to vicinity of actual insn */ 39 - #define UPROBE_FIX_IP 0x1 36 + #define UPROBE_FIX_IP 0x01 40 37 41 38 /* Adjust the return address of a call insn */ 42 - #define UPROBE_FIX_CALL 0x2 39 + #define UPROBE_FIX_CALL 0x02 43 40 44 41 /* Instruction will modify TF, don't change it */ 45 - #define UPROBE_FIX_SETF 0x4 42 + #define UPROBE_FIX_SETF 0x04 46 43 47 - #define UPROBE_FIX_RIP_AX 0x8000 48 - #define UPROBE_FIX_RIP_CX 0x4000 44 + #define UPROBE_FIX_RIP_SI 0x08 45 + #define UPROBE_FIX_RIP_DI 0x10 46 + #define UPROBE_FIX_RIP_BX 0x20 47 + #define UPROBE_FIX_RIP_MASK \ 48 + (UPROBE_FIX_RIP_SI | UPROBE_FIX_RIP_DI | UPROBE_FIX_RIP_BX) 49 49 50 50 #define UPROBE_TRAP_NR UINT_MAX 51 51 ··· 67 67 * to keep gcc from statically optimizing it out, as variable_test_bit makes 68 68 * some versions of gcc to think only *(unsigned long*) is used. 69 69 */ 70 + #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) 70 71 static volatile u32 good_insns_32[256 / 32] = { 71 72 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ 72 73 /* ---------------------------------------------- */ ··· 90 89 /* ---------------------------------------------- */ 91 90 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ 92 91 }; 92 + #else 93 + #define good_insns_32 NULL 94 + #endif 95 + 96 + /* Good-instruction tables for 64-bit apps */ 97 + #if defined(CONFIG_X86_64) 98 + static volatile u32 good_insns_64[256 / 32] = { 99 + /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ 100 + /* ---------------------------------------------- */ 101 + W(0x00, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 00 */ 102 + W(0x10, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 10 */ 103 + W(0x20, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 20 */ 104 + W(0x30, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 30 */ 105 + W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */ 106 + W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */ 107 + W(0x60, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */ 108 + W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */ 109 + W(0x80, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ 110 + W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */ 111 + W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* a0 */ 112 + W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */ 113 + W(0xc0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */ 114 + W(0xd0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ 115 + W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* e0 */ 116 + W(0xf0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1) /* f0 */ 117 + /* ---------------------------------------------- */ 118 + /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ 119 + }; 120 + #else 121 + #define good_insns_64 NULL 122 + #endif 93 123 94 124 /* Using this for both 64-bit and 32-bit apps */ 95 125 static volatile u32 good_2byte_insns[256 / 32] = { ··· 145 113 /* ---------------------------------------------- */ 146 114 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ 147 115 }; 148 - 149 - #ifdef CONFIG_X86_64 150 - /* Good-instruction tables for 64-bit apps */ 151 - static volatile u32 good_insns_64[256 / 32] = { 152 - /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ 153 - /* ---------------------------------------------- */ 154 - W(0x00, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 00 */ 155 - W(0x10, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 10 */ 156 - W(0x20, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 20 */ 157 - W(0x30, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 30 */ 158 - W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */ 159 - W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */ 160 - W(0x60, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */ 161 - W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */ 162 - W(0x80, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ 163 - W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */ 164 - W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* a0 */ 165 - W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */ 166 - W(0xc0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */ 167 - W(0xd0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ 168 - W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* e0 */ 169 - W(0xf0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1) /* f0 */ 170 - /* ---------------------------------------------- */ 171 - /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ 172 - }; 173 - #endif 174 116 #undef W 175 117 176 118 /* ··· 215 209 return false; 216 210 } 217 211 218 - static int validate_insn_32bits(struct arch_uprobe *auprobe, struct insn *insn) 212 + static int uprobe_init_insn(struct arch_uprobe *auprobe, struct insn *insn, bool x86_64) 219 213 { 220 - insn_init(insn, auprobe->insn, false); 214 + u32 volatile *good_insns; 221 215 222 - /* Skip good instruction prefixes; reject "bad" ones. */ 223 - insn_get_opcode(insn); 216 + insn_init(insn, auprobe->insn, x86_64); 217 + /* has the side-effect of processing the entire instruction */ 218 + insn_get_length(insn); 219 + if (WARN_ON_ONCE(!insn_complete(insn))) 220 + return -ENOEXEC; 221 + 224 222 if (is_prefix_bad(insn)) 225 223 return -ENOTSUPP; 226 224 227 - if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_32)) 225 + if (x86_64) 226 + good_insns = good_insns_64; 227 + else 228 + good_insns = good_insns_32; 229 + 230 + if (test_bit(OPCODE1(insn), (unsigned long *)good_insns)) 228 231 return 0; 229 232 230 233 if (insn->opcode.nbytes == 2) { ··· 245 230 } 246 231 247 232 #ifdef CONFIG_X86_64 233 + static inline bool is_64bit_mm(struct mm_struct *mm) 234 + { 235 + return !config_enabled(CONFIG_IA32_EMULATION) || 236 + !(mm->context.ia32_compat == TIF_IA32); 237 + } 248 238 /* 249 239 * If arch_uprobe->insn doesn't use rip-relative addressing, return 250 240 * immediately. Otherwise, rewrite the instruction so that it accesses 251 241 * its memory operand indirectly through a scratch register. Set 252 - * arch_uprobe->fixups and arch_uprobe->rip_rela_target_address 253 - * accordingly. (The contents of the scratch register will be saved 254 - * before we single-step the modified instruction, and restored 255 - * afterward.) 242 + * defparam->fixups accordingly. (The contents of the scratch register 243 + * will be saved before we single-step the modified instruction, 244 + * and restored afterward). 256 245 * 257 246 * We do this because a rip-relative instruction can access only a 258 247 * relatively small area (+/- 2 GB from the instruction), and the XOL ··· 267 248 * 268 249 * Some useful facts about rip-relative instructions: 269 250 * 270 - * - There's always a modrm byte. 251 + * - There's always a modrm byte with bit layout "00 reg 101". 271 252 * - There's never a SIB byte. 272 253 * - The displacement is always 4 bytes. 254 + * - REX.B=1 bit in REX prefix, which normally extends r/m field, 255 + * has no effect on rip-relative mode. It doesn't make modrm byte 256 + * with r/m=101 refer to register 1101 = R13. 273 257 */ 274 - static void 275 - handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn) 258 + static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn) 276 259 { 277 260 u8 *cursor; 278 261 u8 reg; 262 + u8 reg2; 279 263 280 264 if (!insn_rip_relative(insn)) 281 265 return; 282 266 283 267 /* 284 - * insn_rip_relative() would have decoded rex_prefix, modrm. 268 + * insn_rip_relative() would have decoded rex_prefix, vex_prefix, modrm. 285 269 * Clear REX.b bit (extension of MODRM.rm field): 286 - * we want to encode rax/rcx, not r8/r9. 270 + * we want to encode low numbered reg, not r8+. 287 271 */ 288 272 if (insn->rex_prefix.nbytes) { 289 273 cursor = auprobe->insn + insn_offset_rex_prefix(insn); 290 - *cursor &= 0xfe; /* Clearing REX.B bit */ 274 + /* REX byte has 0100wrxb layout, clearing REX.b bit */ 275 + *cursor &= 0xfe; 276 + } 277 + /* 278 + * Similar treatment for VEX3 prefix. 279 + * TODO: add XOP/EVEX treatment when insn decoder supports them 280 + */ 281 + if (insn->vex_prefix.nbytes == 3) { 282 + /* 283 + * vex2: c5 rvvvvLpp (has no b bit) 284 + * vex3/xop: c4/8f rxbmmmmm wvvvvLpp 285 + * evex: 62 rxbR00mm wvvvv1pp zllBVaaa 286 + * (evex will need setting of both b and x since 287 + * in non-sib encoding evex.x is 4th bit of MODRM.rm) 288 + * Setting VEX3.b (setting because it has inverted meaning): 289 + */ 290 + cursor = auprobe->insn + insn_offset_vex_prefix(insn) + 1; 291 + *cursor |= 0x20; 291 292 } 292 293 294 + /* 295 + * Convert from rip-relative addressing to register-relative addressing 296 + * via a scratch register. 297 + * 298 + * This is tricky since there are insns with modrm byte 299 + * which also use registers not encoded in modrm byte: 300 + * [i]div/[i]mul: implicitly use dx:ax 301 + * shift ops: implicitly use cx 302 + * cmpxchg: implicitly uses ax 303 + * cmpxchg8/16b: implicitly uses dx:ax and bx:cx 304 + * Encoding: 0f c7/1 modrm 305 + * The code below thinks that reg=1 (cx), chooses si as scratch. 306 + * mulx: implicitly uses dx: mulx r/m,r1,r2 does r1:r2 = dx * r/m. 307 + * First appeared in Haswell (BMI2 insn). It is vex-encoded. 308 + * Example where none of bx,cx,dx can be used as scratch reg: 309 + * c4 e2 63 f6 0d disp32 mulx disp32(%rip),%ebx,%ecx 310 + * [v]pcmpistri: implicitly uses cx, xmm0 311 + * [v]pcmpistrm: implicitly uses xmm0 312 + * [v]pcmpestri: implicitly uses ax, dx, cx, xmm0 313 + * [v]pcmpestrm: implicitly uses ax, dx, xmm0 314 + * Evil SSE4.2 string comparison ops from hell. 315 + * maskmovq/[v]maskmovdqu: implicitly uses (ds:rdi) as destination. 316 + * Encoding: 0f f7 modrm, 66 0f f7 modrm, vex-encoded: c5 f9 f7 modrm. 317 + * Store op1, byte-masked by op2 msb's in each byte, to (ds:rdi). 318 + * AMD says it has no 3-operand form (vex.vvvv must be 1111) 319 + * and that it can have only register operands, not mem 320 + * (its modrm byte must have mode=11). 321 + * If these restrictions will ever be lifted, 322 + * we'll need code to prevent selection of di as scratch reg! 323 + * 324 + * Summary: I don't know any insns with modrm byte which 325 + * use SI register implicitly. DI register is used only 326 + * by one insn (maskmovq) and BX register is used 327 + * only by one too (cmpxchg8b). 328 + * BP is stack-segment based (may be a problem?). 329 + * AX, DX, CX are off-limits (many implicit users). 330 + * SP is unusable (it's stack pointer - think about "pop mem"; 331 + * also, rsp+disp32 needs sib encoding -> insn length change). 332 + */ 333 + 334 + reg = MODRM_REG(insn); /* Fetch modrm.reg */ 335 + reg2 = 0xff; /* Fetch vex.vvvv */ 336 + if (insn->vex_prefix.nbytes == 2) 337 + reg2 = insn->vex_prefix.bytes[1]; 338 + else if (insn->vex_prefix.nbytes == 3) 339 + reg2 = insn->vex_prefix.bytes[2]; 340 + /* 341 + * TODO: add XOP, EXEV vvvv reading. 342 + * 343 + * vex.vvvv field is in bits 6-3, bits are inverted. 344 + * But in 32-bit mode, high-order bit may be ignored. 345 + * Therefore, let's consider only 3 low-order bits. 346 + */ 347 + reg2 = ((reg2 >> 3) & 0x7) ^ 0x7; 348 + /* 349 + * Register numbering is ax,cx,dx,bx, sp,bp,si,di, r8..r15. 350 + * 351 + * Choose scratch reg. Order is important: must not select bx 352 + * if we can use si (cmpxchg8b case!) 353 + */ 354 + if (reg != 6 && reg2 != 6) { 355 + reg2 = 6; 356 + auprobe->defparam.fixups |= UPROBE_FIX_RIP_SI; 357 + } else if (reg != 7 && reg2 != 7) { 358 + reg2 = 7; 359 + auprobe->defparam.fixups |= UPROBE_FIX_RIP_DI; 360 + /* TODO (paranoia): force maskmovq to not use di */ 361 + } else { 362 + reg2 = 3; 363 + auprobe->defparam.fixups |= UPROBE_FIX_RIP_BX; 364 + } 293 365 /* 294 366 * Point cursor at the modrm byte. The next 4 bytes are the 295 367 * displacement. Beyond the displacement, for some instructions, 296 368 * is the immediate operand. 297 369 */ 298 370 cursor = auprobe->insn + insn_offset_modrm(insn); 299 - insn_get_length(insn); 300 - 301 371 /* 302 - * Convert from rip-relative addressing to indirect addressing 303 - * via a scratch register. Change the r/m field from 0x5 (%rip) 304 - * to 0x0 (%rax) or 0x1 (%rcx), and squeeze out the offset field. 372 + * Change modrm from "00 reg 101" to "10 reg reg2". Example: 373 + * 89 05 disp32 mov %eax,disp32(%rip) becomes 374 + * 89 86 disp32 mov %eax,disp32(%rsi) 305 375 */ 306 - reg = MODRM_REG(insn); 307 - if (reg == 0) { 308 - /* 309 - * The register operand (if any) is either the A register 310 - * (%rax, %eax, etc.) or (if the 0x4 bit is set in the 311 - * REX prefix) %r8. In any case, we know the C register 312 - * is NOT the register operand, so we use %rcx (register 313 - * #1) for the scratch register. 314 - */ 315 - auprobe->fixups = UPROBE_FIX_RIP_CX; 316 - /* Change modrm from 00 000 101 to 00 000 001. */ 317 - *cursor = 0x1; 318 - } else { 319 - /* Use %rax (register #0) for the scratch register. */ 320 - auprobe->fixups = UPROBE_FIX_RIP_AX; 321 - /* Change modrm from 00 xxx 101 to 00 xxx 000 */ 322 - *cursor = (reg << 3); 323 - } 376 + *cursor = 0x80 | (reg << 3) | reg2; 377 + } 324 378 325 - /* Target address = address of next instruction + (signed) offset */ 326 - auprobe->rip_rela_target_address = (long)insn->length + insn->displacement.value; 327 - 328 - /* Displacement field is gone; slide immediate field (if any) over. */ 329 - if (insn->immediate.nbytes) { 330 - cursor++; 331 - memmove(cursor, cursor + insn->displacement.nbytes, insn->immediate.nbytes); 332 - } 379 + static inline unsigned long * 380 + scratch_reg(struct arch_uprobe *auprobe, struct pt_regs *regs) 381 + { 382 + if (auprobe->defparam.fixups & UPROBE_FIX_RIP_SI) 383 + return &regs->si; 384 + if (auprobe->defparam.fixups & UPROBE_FIX_RIP_DI) 385 + return &regs->di; 386 + return &regs->bx; 333 387 } 334 388 335 389 /* 336 390 * If we're emulating a rip-relative instruction, save the contents 337 391 * of the scratch register and store the target address in that register. 338 392 */ 339 - static void 340 - pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs, 341 - struct arch_uprobe_task *autask) 393 + static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) 342 394 { 343 - if (auprobe->fixups & UPROBE_FIX_RIP_AX) { 344 - autask->saved_scratch_register = regs->ax; 345 - regs->ax = current->utask->vaddr; 346 - regs->ax += auprobe->rip_rela_target_address; 347 - } else if (auprobe->fixups & UPROBE_FIX_RIP_CX) { 348 - autask->saved_scratch_register = regs->cx; 349 - regs->cx = current->utask->vaddr; 350 - regs->cx += auprobe->rip_rela_target_address; 395 + if (auprobe->defparam.fixups & UPROBE_FIX_RIP_MASK) { 396 + struct uprobe_task *utask = current->utask; 397 + unsigned long *sr = scratch_reg(auprobe, regs); 398 + 399 + utask->autask.saved_scratch_register = *sr; 400 + *sr = utask->vaddr + auprobe->defparam.ilen; 351 401 } 352 402 } 353 403 354 - static void 355 - handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *correction) 404 + static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) 356 405 { 357 - if (auprobe->fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) { 358 - struct arch_uprobe_task *autask; 406 + if (auprobe->defparam.fixups & UPROBE_FIX_RIP_MASK) { 407 + struct uprobe_task *utask = current->utask; 408 + unsigned long *sr = scratch_reg(auprobe, regs); 359 409 360 - autask = &current->utask->autask; 361 - if (auprobe->fixups & UPROBE_FIX_RIP_AX) 362 - regs->ax = autask->saved_scratch_register; 363 - else 364 - regs->cx = autask->saved_scratch_register; 365 - 366 - /* 367 - * The original instruction includes a displacement, and so 368 - * is 4 bytes longer than what we've just single-stepped. 369 - * Caller may need to apply other fixups to handle stuff 370 - * like "jmpq *...(%rip)" and "callq *...(%rip)". 371 - */ 372 - if (correction) 373 - *correction += 4; 410 + *sr = utask->autask.saved_scratch_register; 374 411 } 375 - } 376 - 377 - static int validate_insn_64bits(struct arch_uprobe *auprobe, struct insn *insn) 378 - { 379 - insn_init(insn, auprobe->insn, true); 380 - 381 - /* Skip good instruction prefixes; reject "bad" ones. */ 382 - insn_get_opcode(insn); 383 - if (is_prefix_bad(insn)) 384 - return -ENOTSUPP; 385 - 386 - if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_64)) 387 - return 0; 388 - 389 - if (insn->opcode.nbytes == 2) { 390 - if (test_bit(OPCODE2(insn), (unsigned long *)good_2byte_insns)) 391 - return 0; 392 - } 393 - return -ENOTSUPP; 394 - } 395 - 396 - static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn) 397 - { 398 - if (mm->context.ia32_compat) 399 - return validate_insn_32bits(auprobe, insn); 400 - return validate_insn_64bits(auprobe, insn); 401 412 } 402 413 #else /* 32-bit: */ 414 + static inline bool is_64bit_mm(struct mm_struct *mm) 415 + { 416 + return false; 417 + } 403 418 /* 404 419 * No RIP-relative addressing on 32-bit 405 420 */ 406 - static void handle_riprel_insn(struct arch_uprobe *auprobe, struct insn *insn) 421 + static void riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn) 407 422 { 408 423 } 409 - static void pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs, 410 - struct arch_uprobe_task *autask) 424 + static void riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) 411 425 { 412 426 } 413 - static void handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, 414 - long *correction) 427 + static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) 415 428 { 416 - } 417 - 418 - static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn) 419 - { 420 - return validate_insn_32bits(auprobe, insn); 421 429 } 422 430 #endif /* CONFIG_X86_64 */ 423 431 ··· 452 406 bool (*emulate)(struct arch_uprobe *, struct pt_regs *); 453 407 int (*pre_xol)(struct arch_uprobe *, struct pt_regs *); 454 408 int (*post_xol)(struct arch_uprobe *, struct pt_regs *); 409 + void (*abort)(struct arch_uprobe *, struct pt_regs *); 455 410 }; 456 411 457 412 static inline int sizeof_long(void) ··· 462 415 463 416 static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) 464 417 { 465 - pre_xol_rip_insn(auprobe, regs, &current->utask->autask); 418 + riprel_pre_xol(auprobe, regs); 419 + return 0; 420 + } 421 + 422 + static int push_ret_address(struct pt_regs *regs, unsigned long ip) 423 + { 424 + unsigned long new_sp = regs->sp - sizeof_long(); 425 + 426 + if (copy_to_user((void __user *)new_sp, &ip, sizeof_long())) 427 + return -EFAULT; 428 + 429 + regs->sp = new_sp; 466 430 return 0; 467 431 } 468 432 469 433 /* 470 - * Adjust the return address pushed by a call insn executed out of line. 434 + * We have to fix things up as follows: 435 + * 436 + * Typically, the new ip is relative to the copied instruction. We need 437 + * to make it relative to the original instruction (FIX_IP). Exceptions 438 + * are return instructions and absolute or indirect jump or call instructions. 439 + * 440 + * If the single-stepped instruction was a call, the return address that 441 + * is atop the stack is the address following the copied instruction. We 442 + * need to make it the address following the original instruction (FIX_CALL). 443 + * 444 + * If the original instruction was a rip-relative instruction such as 445 + * "movl %edx,0xnnnn(%rip)", we have instead executed an equivalent 446 + * instruction using a scratch register -- e.g., "movl %edx,0xnnnn(%rsi)". 447 + * We need to restore the contents of the scratch register 448 + * (FIX_RIP_reg). 471 449 */ 472 - static int adjust_ret_addr(unsigned long sp, long correction) 450 + static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) 473 451 { 474 - int rasize = sizeof_long(); 475 - long ra; 452 + struct uprobe_task *utask = current->utask; 476 453 477 - if (copy_from_user(&ra, (void __user *)sp, rasize)) 478 - return -EFAULT; 479 - 480 - ra += correction; 481 - if (copy_to_user((void __user *)sp, &ra, rasize)) 482 - return -EFAULT; 454 + riprel_post_xol(auprobe, regs); 455 + if (auprobe->defparam.fixups & UPROBE_FIX_IP) { 456 + long correction = utask->vaddr - utask->xol_vaddr; 457 + regs->ip += correction; 458 + } else if (auprobe->defparam.fixups & UPROBE_FIX_CALL) { 459 + regs->sp += sizeof_long(); /* Pop incorrect return address */ 460 + if (push_ret_address(regs, utask->vaddr + auprobe->defparam.ilen)) 461 + return -ERESTART; 462 + } 463 + /* popf; tell the caller to not touch TF */ 464 + if (auprobe->defparam.fixups & UPROBE_FIX_SETF) 465 + utask->autask.saved_tf = true; 483 466 484 467 return 0; 485 468 } 486 469 487 - static int default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs) 470 + static void default_abort_op(struct arch_uprobe *auprobe, struct pt_regs *regs) 488 471 { 489 - struct uprobe_task *utask = current->utask; 490 - long correction = (long)(utask->vaddr - utask->xol_vaddr); 491 - 492 - handle_riprel_post_xol(auprobe, regs, &correction); 493 - if (auprobe->fixups & UPROBE_FIX_IP) 494 - regs->ip += correction; 495 - 496 - if (auprobe->fixups & UPROBE_FIX_CALL) { 497 - if (adjust_ret_addr(regs->sp, correction)) { 498 - regs->sp += sizeof_long(); 499 - return -ERESTART; 500 - } 501 - } 502 - 503 - return 0; 472 + riprel_post_xol(auprobe, regs); 504 473 } 505 474 506 475 static struct uprobe_xol_ops default_xol_ops = { 507 476 .pre_xol = default_pre_xol_op, 508 477 .post_xol = default_post_xol_op, 478 + .abort = default_abort_op, 509 479 }; 510 480 511 481 static bool branch_is_call(struct arch_uprobe *auprobe) ··· 584 520 unsigned long offs = (long)auprobe->branch.offs; 585 521 586 522 if (branch_is_call(auprobe)) { 587 - unsigned long new_sp = regs->sp - sizeof_long(); 588 523 /* 589 524 * If it fails we execute this (mangled, see the comment in 590 525 * branch_clear_offset) insn out-of-line. In the likely case ··· 593 530 * 594 531 * But there is corner case, see the comment in ->post_xol(). 595 532 */ 596 - if (copy_to_user((void __user *)new_sp, &new_ip, sizeof_long())) 533 + if (push_ret_address(regs, new_ip)) 597 534 return false; 598 - regs->sp = new_sp; 599 535 } else if (!check_jmp_cond(auprobe, regs)) { 600 536 offs = 0; 601 537 } ··· 645 583 static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn) 646 584 { 647 585 u8 opc1 = OPCODE1(insn); 648 - 649 - /* has the side-effect of processing the entire instruction */ 650 - insn_get_length(insn); 651 - if (WARN_ON_ONCE(!insn_complete(insn))) 652 - return -ENOEXEC; 586 + int i; 653 587 654 588 switch (opc1) { 655 589 case 0xeb: /* jmp 8 */ ··· 670 612 return -ENOSYS; 671 613 } 672 614 615 + /* 616 + * 16-bit overrides such as CALLW (66 e8 nn nn) are not supported. 617 + * Intel and AMD behavior differ in 64-bit mode: Intel ignores 66 prefix. 618 + * No one uses these insns, reject any branch insns with such prefix. 619 + */ 620 + for (i = 0; i < insn->prefixes.nbytes; i++) { 621 + if (insn->prefixes.bytes[i] == 0x66) 622 + return -ENOTSUPP; 623 + } 624 + 673 625 auprobe->branch.opc1 = opc1; 674 626 auprobe->branch.ilen = insn->length; 675 627 auprobe->branch.offs = insn->immediate.value; ··· 698 630 int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long addr) 699 631 { 700 632 struct insn insn; 701 - bool fix_ip = true, fix_call = false; 633 + u8 fix_ip_or_call = UPROBE_FIX_IP; 702 634 int ret; 703 635 704 - ret = validate_insn_bits(auprobe, mm, &insn); 636 + ret = uprobe_init_insn(auprobe, &insn, is_64bit_mm(mm)); 705 637 if (ret) 706 638 return ret; 707 639 ··· 710 642 return ret; 711 643 712 644 /* 713 - * Figure out which fixups arch_uprobe_post_xol() will need to perform, 714 - * and annotate arch_uprobe->fixups accordingly. To start with, ->fixups 715 - * is either zero or it reflects rip-related fixups. 645 + * Figure out which fixups default_post_xol_op() will need to perform, 646 + * and annotate defparam->fixups accordingly. 716 647 */ 717 648 switch (OPCODE1(&insn)) { 718 649 case 0x9d: /* popf */ 719 - auprobe->fixups |= UPROBE_FIX_SETF; 650 + auprobe->defparam.fixups |= UPROBE_FIX_SETF; 720 651 break; 721 652 case 0xc3: /* ret or lret -- ip is correct */ 722 653 case 0xcb: 723 654 case 0xc2: 724 655 case 0xca: 725 - fix_ip = false; 656 + case 0xea: /* jmp absolute -- ip is correct */ 657 + fix_ip_or_call = 0; 726 658 break; 727 659 case 0x9a: /* call absolute - Fix return addr, not ip */ 728 - fix_call = true; 729 - fix_ip = false; 730 - break; 731 - case 0xea: /* jmp absolute -- ip is correct */ 732 - fix_ip = false; 660 + fix_ip_or_call = UPROBE_FIX_CALL; 733 661 break; 734 662 case 0xff: 735 - insn_get_modrm(&insn); 736 663 switch (MODRM_REG(&insn)) { 737 664 case 2: case 3: /* call or lcall, indirect */ 738 - fix_call = true; 665 + fix_ip_or_call = UPROBE_FIX_CALL; 666 + break; 739 667 case 4: case 5: /* jmp or ljmp, indirect */ 740 - fix_ip = false; 668 + fix_ip_or_call = 0; 669 + break; 741 670 } 742 671 /* fall through */ 743 672 default: 744 - handle_riprel_insn(auprobe, &insn); 673 + riprel_analyze(auprobe, &insn); 745 674 } 746 675 747 - if (fix_ip) 748 - auprobe->fixups |= UPROBE_FIX_IP; 749 - if (fix_call) 750 - auprobe->fixups |= UPROBE_FIX_CALL; 676 + auprobe->defparam.ilen = insn.length; 677 + auprobe->defparam.fixups |= fix_ip_or_call; 751 678 752 679 auprobe->ops = &default_xol_ops; 753 680 return 0; ··· 757 694 { 758 695 struct uprobe_task *utask = current->utask; 759 696 697 + if (auprobe->ops->pre_xol) { 698 + int err = auprobe->ops->pre_xol(auprobe, regs); 699 + if (err) 700 + return err; 701 + } 702 + 760 703 regs->ip = utask->xol_vaddr; 761 704 utask->autask.saved_trap_nr = current->thread.trap_nr; 762 705 current->thread.trap_nr = UPROBE_TRAP_NR; ··· 772 703 if (test_tsk_thread_flag(current, TIF_BLOCKSTEP)) 773 704 set_task_blockstep(current, false); 774 705 775 - if (auprobe->ops->pre_xol) 776 - return auprobe->ops->pre_xol(auprobe, regs); 777 706 return 0; 778 707 } 779 708 ··· 799 732 * single-step, we single-stepped a copy of the instruction. 800 733 * 801 734 * This function prepares to resume execution after the single-step. 802 - * We have to fix things up as follows: 803 - * 804 - * Typically, the new ip is relative to the copied instruction. We need 805 - * to make it relative to the original instruction (FIX_IP). Exceptions 806 - * are return instructions and absolute or indirect jump or call instructions. 807 - * 808 - * If the single-stepped instruction was a call, the return address that 809 - * is atop the stack is the address following the copied instruction. We 810 - * need to make it the address following the original instruction (FIX_CALL). 811 - * 812 - * If the original instruction was a rip-relative instruction such as 813 - * "movl %edx,0xnnnn(%rip)", we have instead executed an equivalent 814 - * instruction using a scratch register -- e.g., "movl %edx,(%rax)". 815 - * We need to restore the contents of the scratch register and adjust 816 - * the ip, keeping in mind that the instruction we executed is 4 bytes 817 - * shorter than the original instruction (since we squeezed out the offset 818 - * field). (FIX_RIP_AX or FIX_RIP_CX) 819 735 */ 820 736 int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) 821 737 { 822 738 struct uprobe_task *utask = current->utask; 739 + bool send_sigtrap = utask->autask.saved_tf; 740 + int err = 0; 823 741 824 742 WARN_ON_ONCE(current->thread.trap_nr != UPROBE_TRAP_NR); 743 + current->thread.trap_nr = utask->autask.saved_trap_nr; 825 744 826 745 if (auprobe->ops->post_xol) { 827 - int err = auprobe->ops->post_xol(auprobe, regs); 746 + err = auprobe->ops->post_xol(auprobe, regs); 828 747 if (err) { 829 - arch_uprobe_abort_xol(auprobe, regs); 830 748 /* 831 - * Restart the probed insn. ->post_xol() must ensure 832 - * this is really possible if it returns -ERESTART. 749 + * Restore ->ip for restart or post mortem analysis. 750 + * ->post_xol() must not return -ERESTART unless this 751 + * is really possible. 833 752 */ 753 + regs->ip = utask->vaddr; 834 754 if (err == -ERESTART) 835 - return 0; 836 - return err; 755 + err = 0; 756 + send_sigtrap = false; 837 757 } 838 758 } 839 - 840 - current->thread.trap_nr = utask->autask.saved_trap_nr; 841 759 /* 842 760 * arch_uprobe_pre_xol() doesn't save the state of TIF_BLOCKSTEP 843 761 * so we can get an extra SIGTRAP if we do not clear TF. We need 844 762 * to examine the opcode to make it right. 845 763 */ 846 - if (utask->autask.saved_tf) 764 + if (send_sigtrap) 847 765 send_sig(SIGTRAP, current, 0); 848 - else if (!(auprobe->fixups & UPROBE_FIX_SETF)) 766 + 767 + if (!utask->autask.saved_tf) 849 768 regs->flags &= ~X86_EFLAGS_TF; 850 769 851 - return 0; 770 + return err; 852 771 } 853 772 854 773 /* callback routine for handling exceptions. */ ··· 868 815 869 816 /* 870 817 * This function gets called when XOL instruction either gets trapped or 871 - * the thread has a fatal signal, or if arch_uprobe_post_xol() failed. 872 - * Reset the instruction pointer to its probed address for the potential 873 - * restart or for post mortem analysis. 818 + * the thread has a fatal signal. Reset the instruction pointer to its 819 + * probed address for the potential restart or for post mortem analysis. 874 820 */ 875 821 void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) 876 822 { 877 823 struct uprobe_task *utask = current->utask; 878 824 879 - current->thread.trap_nr = utask->autask.saved_trap_nr; 880 - handle_riprel_post_xol(auprobe, regs, NULL); 881 - instruction_pointer_set(regs, utask->vaddr); 825 + if (auprobe->ops->abort) 826 + auprobe->ops->abort(auprobe, regs); 882 827 828 + current->thread.trap_nr = utask->autask.saved_trap_nr; 829 + regs->ip = utask->vaddr; 883 830 /* clear TF if it was set by us in arch_uprobe_pre_xol() */ 884 831 if (!utask->autask.saved_tf) 885 832 regs->flags &= ~X86_EFLAGS_TF;
+2 -1
arch/x86/lib/thunk_32.S
··· 4 4 * (inspired by Andi Kleen's thunk_64.S) 5 5 * Subject to the GNU public license, v.2. No warranty of any kind. 6 6 */ 7 - 8 7 #include <linux/linkage.h> 8 + #include <asm/asm.h> 9 9 10 10 #ifdef CONFIG_TRACE_IRQFLAGS 11 11 /* put return address in eax (arg1) */ ··· 22 22 popl %ecx 23 23 popl %eax 24 24 ret 25 + _ASM_NOKPROBE(\name) 25 26 .endm 26 27 27 28 thunk_ra trace_hardirqs_on_thunk,trace_hardirqs_on_caller
+3
arch/x86/lib/thunk_64.S
··· 8 8 #include <linux/linkage.h> 9 9 #include <asm/dwarf2.h> 10 10 #include <asm/calling.h> 11 + #include <asm/asm.h> 11 12 12 13 /* rdi: arg1 ... normal C conventions. rax is saved/restored. */ 13 14 .macro THUNK name, func, put_ret_addr_in_rdi=0 ··· 26 25 call \func 27 26 jmp restore 28 27 CFI_ENDPROC 28 + _ASM_NOKPROBE(\name) 29 29 .endm 30 30 31 31 #ifdef CONFIG_TRACE_IRQFLAGS ··· 45 43 RESTORE_ARGS 46 44 ret 47 45 CFI_ENDPROC 46 + _ASM_NOKPROBE(restore)
+18 -11
arch/x86/mm/fault.c
··· 8 8 #include <linux/kdebug.h> /* oops_begin/end, ... */ 9 9 #include <linux/module.h> /* search_exception_table */ 10 10 #include <linux/bootmem.h> /* max_low_pfn */ 11 - #include <linux/kprobes.h> /* __kprobes, ... */ 11 + #include <linux/kprobes.h> /* NOKPROBE_SYMBOL, ... */ 12 12 #include <linux/mmiotrace.h> /* kmmio_handler, ... */ 13 13 #include <linux/perf_event.h> /* perf_sw_event */ 14 14 #include <linux/hugetlb.h> /* hstate_index_to_shift */ ··· 46 46 * Returns 0 if mmiotrace is disabled, or if the fault is not 47 47 * handled by mmiotrace: 48 48 */ 49 - static inline int __kprobes 49 + static nokprobe_inline int 50 50 kmmio_fault(struct pt_regs *regs, unsigned long addr) 51 51 { 52 52 if (unlikely(is_kmmio_active())) ··· 55 55 return 0; 56 56 } 57 57 58 - static inline int __kprobes kprobes_fault(struct pt_regs *regs) 58 + static nokprobe_inline int kprobes_fault(struct pt_regs *regs) 59 59 { 60 60 int ret = 0; 61 61 ··· 262 262 * 263 263 * Handle a fault on the vmalloc or module mapping area 264 264 */ 265 - static noinline __kprobes int vmalloc_fault(unsigned long address) 265 + static noinline int vmalloc_fault(unsigned long address) 266 266 { 267 267 unsigned long pgd_paddr; 268 268 pmd_t *pmd_k; ··· 292 292 293 293 return 0; 294 294 } 295 + NOKPROBE_SYMBOL(vmalloc_fault); 295 296 296 297 /* 297 298 * Did it hit the DOS screen memory VA from vm86 mode? ··· 360 359 * 361 360 * This assumes no large pages in there. 362 361 */ 363 - static noinline __kprobes int vmalloc_fault(unsigned long address) 362 + static noinline int vmalloc_fault(unsigned long address) 364 363 { 365 364 pgd_t *pgd, *pgd_ref; 366 365 pud_t *pud, *pud_ref; ··· 427 426 428 427 return 0; 429 428 } 429 + NOKPROBE_SYMBOL(vmalloc_fault); 430 430 431 431 #ifdef CONFIG_CPU_SUP_AMD 432 432 static const char errata93_warning[] = ··· 930 928 * There are no security implications to leaving a stale TLB when 931 929 * increasing the permissions on a page. 932 930 */ 933 - static noinline __kprobes int 931 + static noinline int 934 932 spurious_fault(unsigned long error_code, unsigned long address) 935 933 { 936 934 pgd_t *pgd; ··· 978 976 979 977 return ret; 980 978 } 979 + NOKPROBE_SYMBOL(spurious_fault); 981 980 982 981 int show_unhandled_signals = 1; 983 982 ··· 1034 1031 * {,trace_}do_page_fault() have notrace on. Having this an actual function 1035 1032 * guarantees there's a function trace entry. 1036 1033 */ 1037 - static void __kprobes noinline 1034 + static noinline void 1038 1035 __do_page_fault(struct pt_regs *regs, unsigned long error_code, 1039 1036 unsigned long address) 1040 1037 { ··· 1257 1254 1258 1255 up_read(&mm->mmap_sem); 1259 1256 } 1257 + NOKPROBE_SYMBOL(__do_page_fault); 1260 1258 1261 - dotraplinkage void __kprobes notrace 1259 + dotraplinkage void notrace 1262 1260 do_page_fault(struct pt_regs *regs, unsigned long error_code) 1263 1261 { 1264 1262 unsigned long address = read_cr2(); /* Get the faulting address */ ··· 1277 1273 __do_page_fault(regs, error_code, address); 1278 1274 exception_exit(prev_state); 1279 1275 } 1276 + NOKPROBE_SYMBOL(do_page_fault); 1280 1277 1281 1278 #ifdef CONFIG_TRACING 1282 - static void trace_page_fault_entries(unsigned long address, struct pt_regs *regs, 1283 - unsigned long error_code) 1279 + static nokprobe_inline void 1280 + trace_page_fault_entries(unsigned long address, struct pt_regs *regs, 1281 + unsigned long error_code) 1284 1282 { 1285 1283 if (user_mode(regs)) 1286 1284 trace_page_fault_user(address, regs, error_code); ··· 1290 1284 trace_page_fault_kernel(address, regs, error_code); 1291 1285 } 1292 1286 1293 - dotraplinkage void __kprobes notrace 1287 + dotraplinkage void notrace 1294 1288 trace_do_page_fault(struct pt_regs *regs, unsigned long error_code) 1295 1289 { 1296 1290 /* ··· 1307 1301 __do_page_fault(regs, error_code, address); 1308 1302 exception_exit(prev_state); 1309 1303 } 1304 + NOKPROBE_SYMBOL(trace_do_page_fault); 1310 1305 #endif /* CONFIG_TRACING */
+4 -3
fs/exec.c
··· 1046 1046 * so that a new one can be started 1047 1047 */ 1048 1048 1049 - void set_task_comm(struct task_struct *tsk, const char *buf) 1049 + void __set_task_comm(struct task_struct *tsk, const char *buf, bool exec) 1050 1050 { 1051 1051 task_lock(tsk); 1052 1052 trace_task_rename(tsk, buf); 1053 1053 strlcpy(tsk->comm, buf, sizeof(tsk->comm)); 1054 1054 task_unlock(tsk); 1055 - perf_event_comm(tsk); 1055 + perf_event_comm(tsk, exec); 1056 1056 } 1057 1057 1058 1058 int flush_old_exec(struct linux_binprm * bprm) ··· 1110 1110 else 1111 1111 set_dumpable(current->mm, suid_dumpable); 1112 1112 1113 - set_task_comm(current, kbasename(bprm->filename)); 1113 + perf_event_exec(); 1114 + __set_task_comm(current, kbasename(bprm->filename), true); 1114 1115 1115 1116 /* Set the new mm task size. We have to do that late because it may 1116 1117 * depend on TIF_32BIT which is only updated in flush_thread() on
+10
include/asm-generic/vmlinux.lds.h
··· 109 109 #define BRANCH_PROFILE() 110 110 #endif 111 111 112 + #ifdef CONFIG_KPROBES 113 + #define KPROBE_BLACKLIST() . = ALIGN(8); \ 114 + VMLINUX_SYMBOL(__start_kprobe_blacklist) = .; \ 115 + *(_kprobe_blacklist) \ 116 + VMLINUX_SYMBOL(__stop_kprobe_blacklist) = .; 117 + #else 118 + #define KPROBE_BLACKLIST() 119 + #endif 120 + 112 121 #ifdef CONFIG_EVENT_TRACING 113 122 #define FTRACE_EVENTS() . = ALIGN(8); \ 114 123 VMLINUX_SYMBOL(__start_ftrace_events) = .; \ ··· 487 478 *(.init.rodata) \ 488 479 FTRACE_EVENTS() \ 489 480 TRACE_SYSCALLS() \ 481 + KPROBE_BLACKLIST() \ 490 482 MEM_DISCARD(init.rodata) \ 491 483 CLK_OF_TABLES() \ 492 484 RESERVEDMEM_OF_TABLES() \
+2
include/linux/compiler.h
··· 383 383 /* Ignore/forbid kprobes attach on very low level functions marked by this attribute: */ 384 384 #ifdef CONFIG_KPROBES 385 385 # define __kprobes __attribute__((__section__(".kprobes.text"))) 386 + # define nokprobe_inline __always_inline 386 387 #else 387 388 # define __kprobes 389 + # define nokprobe_inline inline 388 390 #endif 389 391 #endif /* __LINUX_COMPILER_H */
+18 -3
include/linux/kprobes.h
··· 205 205 void *addr; 206 206 }; 207 207 208 - struct kprobe_blackpoint { 209 - const char *name; 208 + struct kprobe_blacklist_entry { 209 + struct list_head list; 210 210 unsigned long start_addr; 211 - unsigned long range; 211 + unsigned long end_addr; 212 212 }; 213 213 214 214 #ifdef CONFIG_KPROBES ··· 265 265 extern int arch_init_kprobes(void); 266 266 extern void show_registers(struct pt_regs *regs); 267 267 extern void kprobes_inc_nmissed_count(struct kprobe *p); 268 + extern bool arch_within_kprobe_blacklist(unsigned long addr); 268 269 269 270 struct kprobe_insn_cache { 270 271 struct mutex mutex; ··· 476 475 { 477 476 return enable_kprobe(&jp->kp); 478 477 } 478 + 479 + #ifdef CONFIG_KPROBES 480 + /* 481 + * Blacklist ganerating macro. Specify functions which is not probed 482 + * by using this macro. 483 + */ 484 + #define __NOKPROBE_SYMBOL(fname) \ 485 + static unsigned long __used \ 486 + __attribute__((section("_kprobe_blacklist"))) \ 487 + _kbl_addr_##fname = (unsigned long)fname; 488 + #define NOKPROBE_SYMBOL(fname) __NOKPROBE_SYMBOL(fname) 489 + #else 490 + #define NOKPROBE_SYMBOL(fname) 491 + #endif 479 492 480 493 #endif /* _LINUX_KPROBES_H */
+15 -3
include/linux/perf_event.h
··· 167 167 #define PERF_EVENT_TXN 0x1 168 168 169 169 /** 170 + * pmu::capabilities flags 171 + */ 172 + #define PERF_PMU_CAP_NO_INTERRUPT 0x01 173 + 174 + /** 170 175 * struct pmu - generic performance monitoring unit 171 176 */ 172 177 struct pmu { ··· 182 177 const struct attribute_group **attr_groups; 183 178 const char *name; 184 179 int type; 180 + 181 + /* 182 + * various common per-pmu feature flags 183 + */ 184 + int capabilities; 185 185 186 186 int * __percpu pmu_disable_count; 187 187 struct perf_cpu_context * __percpu pmu_cpu_context; ··· 706 696 extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); 707 697 extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); 708 698 709 - extern void perf_event_comm(struct task_struct *tsk); 699 + extern void perf_event_exec(void); 700 + extern void perf_event_comm(struct task_struct *tsk, bool exec); 710 701 extern void perf_event_fork(struct task_struct *tsk); 711 702 712 703 /* Callchains */ ··· 784 773 extern void perf_event_disable(struct perf_event *event); 785 774 extern int __perf_event_disable(void *info); 786 775 extern void perf_event_task_tick(void); 787 - #else 776 + #else /* !CONFIG_PERF_EVENTS: */ 788 777 static inline void 789 778 perf_event_task_sched_in(struct task_struct *prev, 790 779 struct task_struct *task) { } ··· 814 803 (struct perf_guest_info_callbacks *callbacks) { return 0; } 815 804 816 805 static inline void perf_event_mmap(struct vm_area_struct *vma) { } 817 - static inline void perf_event_comm(struct task_struct *tsk) { } 806 + static inline void perf_event_exec(void) { } 807 + static inline void perf_event_comm(struct task_struct *tsk, bool exec) { } 818 808 static inline void perf_event_fork(struct task_struct *tsk) { } 819 809 static inline void perf_event_init(void) { } 820 810 static inline int perf_swevent_get_recursion_context(void) { return -1; }
+5 -1
include/linux/sched.h
··· 2421 2421 struct task_struct *fork_idle(int); 2422 2422 extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); 2423 2423 2424 - extern void set_task_comm(struct task_struct *tsk, const char *from); 2424 + extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec); 2425 + static inline void set_task_comm(struct task_struct *tsk, const char *from) 2426 + { 2427 + __set_task_comm(tsk, from, false); 2428 + } 2425 2429 extern char *get_task_comm(char *to, struct task_struct *tsk); 2426 2430 2427 2431 #ifdef CONFIG_SMP
+4
include/linux/uprobes.h
··· 103 103 extern bool __weak is_swbp_insn(uprobe_opcode_t *insn); 104 104 extern bool __weak is_trap_insn(uprobe_opcode_t *insn); 105 105 extern unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs); 106 + extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs); 106 107 extern int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t); 107 108 extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc); 108 109 extern int uprobe_apply(struct inode *inode, loff_t offset, struct uprobe_consumer *uc, bool); ··· 134 133 #else /* !CONFIG_UPROBES */ 135 134 struct uprobes_state { 136 135 }; 136 + 137 + #define uprobe_get_trap_addr(regs) instruction_pointer(regs) 138 + 137 139 static inline int 138 140 uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc) 139 141 {
+9 -3
include/uapi/linux/perf_event.h
··· 163 163 PERF_SAMPLE_BRANCH_ABORT_TX = 1U << 7, /* transaction aborts */ 164 164 PERF_SAMPLE_BRANCH_IN_TX = 1U << 8, /* in transaction */ 165 165 PERF_SAMPLE_BRANCH_NO_TX = 1U << 9, /* not in transaction */ 166 + PERF_SAMPLE_BRANCH_COND = 1U << 10, /* conditional branches */ 166 167 167 - PERF_SAMPLE_BRANCH_MAX = 1U << 10, /* non-ABI */ 168 + PERF_SAMPLE_BRANCH_MAX = 1U << 11, /* non-ABI */ 168 169 }; 169 170 170 171 #define PERF_SAMPLE_BRANCH_PLM_ALL \ ··· 302 301 exclude_callchain_kernel : 1, /* exclude kernel callchains */ 303 302 exclude_callchain_user : 1, /* exclude user callchains */ 304 303 mmap2 : 1, /* include mmap with inode data */ 305 - 306 - __reserved_1 : 40; 304 + comm_exec : 1, /* flag comm events that are due to an exec */ 305 + __reserved_1 : 39; 307 306 308 307 union { 309 308 __u32 wakeup_events; /* wakeup every n events */ ··· 502 501 #define PERF_RECORD_MISC_GUEST_KERNEL (4 << 0) 503 502 #define PERF_RECORD_MISC_GUEST_USER (5 << 0) 504 503 504 + /* 505 + * PERF_RECORD_MISC_MMAP_DATA and PERF_RECORD_MISC_COMM_EXEC are used on 506 + * different events so can reuse the same bit position. 507 + */ 505 508 #define PERF_RECORD_MISC_MMAP_DATA (1 << 13) 509 + #define PERF_RECORD_MISC_COMM_EXEC (1 << 13) 506 510 /* 507 511 * Indicates that the content of PERF_SAMPLE_IP points to 508 512 * the actual instruction that triggered the event. See also
+27 -16
kernel/events/core.c
··· 2974 2974 local_irq_restore(flags); 2975 2975 } 2976 2976 2977 + void perf_event_exec(void) 2978 + { 2979 + struct perf_event_context *ctx; 2980 + int ctxn; 2981 + 2982 + rcu_read_lock(); 2983 + for_each_task_context_nr(ctxn) { 2984 + ctx = current->perf_event_ctxp[ctxn]; 2985 + if (!ctx) 2986 + continue; 2987 + 2988 + perf_event_enable_on_exec(ctx); 2989 + } 2990 + rcu_read_unlock(); 2991 + } 2992 + 2977 2993 /* 2978 2994 * Cross CPU call to read the hardware event 2979 2995 */ ··· 5091 5075 NULL); 5092 5076 } 5093 5077 5094 - void perf_event_comm(struct task_struct *task) 5078 + void perf_event_comm(struct task_struct *task, bool exec) 5095 5079 { 5096 5080 struct perf_comm_event comm_event; 5097 - struct perf_event_context *ctx; 5098 - int ctxn; 5099 - 5100 - rcu_read_lock(); 5101 - for_each_task_context_nr(ctxn) { 5102 - ctx = task->perf_event_ctxp[ctxn]; 5103 - if (!ctx) 5104 - continue; 5105 - 5106 - perf_event_enable_on_exec(ctx); 5107 - } 5108 - rcu_read_unlock(); 5109 5081 5110 5082 if (!atomic_read(&nr_comm_events)) 5111 5083 return; ··· 5105 5101 .event_id = { 5106 5102 .header = { 5107 5103 .type = PERF_RECORD_COMM, 5108 - .misc = 0, 5104 + .misc = exec ? PERF_RECORD_MISC_COMM_EXEC : 0, 5109 5105 /* .size */ 5110 5106 }, 5111 5107 /* .pid */ ··· 7126 7122 } 7127 7123 } 7128 7124 7125 + if (is_sampling_event(event)) { 7126 + if (event->pmu->capabilities & PERF_PMU_CAP_NO_INTERRUPT) { 7127 + err = -ENOTSUPP; 7128 + goto err_alloc; 7129 + } 7130 + } 7131 + 7129 7132 account_event(event); 7130 7133 7131 7134 /* ··· 7444 7433 7445 7434 static void perf_event_exit_task_context(struct task_struct *child, int ctxn) 7446 7435 { 7447 - struct perf_event *child_event; 7436 + struct perf_event *child_event, *next; 7448 7437 struct perf_event_context *child_ctx; 7449 7438 unsigned long flags; 7450 7439 ··· 7498 7487 */ 7499 7488 mutex_lock(&child_ctx->mutex); 7500 7489 7501 - list_for_each_entry_rcu(child_event, &child_ctx->event_list, event_entry) 7490 + list_for_each_entry_safe(child_event, next, &child_ctx->event_list, event_entry) 7502 7491 __perf_event_exit_task(child_event, child_ctx, child); 7503 7492 7504 7493 mutex_unlock(&child_ctx->mutex);
+33 -19
kernel/events/uprobes.c
··· 36 36 #include "../../mm/internal.h" /* munlock_vma_page */ 37 37 #include <linux/percpu-rwsem.h> 38 38 #include <linux/task_work.h> 39 + #include <linux/shmem_fs.h> 39 40 40 41 #include <linux/uprobes.h> 41 42 ··· 128 127 */ 129 128 static bool valid_vma(struct vm_area_struct *vma, bool is_register) 130 129 { 131 - vm_flags_t flags = VM_HUGETLB | VM_MAYEXEC | VM_SHARED; 130 + vm_flags_t flags = VM_HUGETLB | VM_MAYEXEC | VM_MAYSHARE; 132 131 133 132 if (is_register) 134 133 flags |= VM_WRITE; ··· 280 279 * supported by that architecture then we need to modify is_trap_at_addr and 281 280 * uprobe_write_opcode accordingly. This would never be a problem for archs 282 281 * that have fixed length instructions. 283 - */ 284 - 285 - /* 282 + * 286 283 * uprobe_write_opcode - write the opcode at a given virtual address. 287 284 * @mm: the probed process address space. 288 285 * @vaddr: the virtual address to store the opcode. 289 286 * @opcode: opcode to be written at @vaddr. 290 287 * 291 - * Called with mm->mmap_sem held (for read and with a reference to 292 - * mm). 293 - * 294 - * For mm @mm, write the opcode at @vaddr. 288 + * Called with mm->mmap_sem held for write. 295 289 * Return 0 (success) or a negative errno. 296 290 */ 297 291 int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr, ··· 306 310 if (ret <= 0) 307 311 goto put_old; 308 312 313 + ret = anon_vma_prepare(vma); 314 + if (ret) 315 + goto put_old; 316 + 309 317 ret = -ENOMEM; 310 318 new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr); 311 319 if (!new_page) 312 320 goto put_old; 313 321 314 - __SetPageUptodate(new_page); 322 + if (mem_cgroup_charge_anon(new_page, mm, GFP_KERNEL)) 323 + goto put_new; 315 324 325 + __SetPageUptodate(new_page); 316 326 copy_highpage(new_page, old_page); 317 327 copy_to_page(new_page, vaddr, &opcode, UPROBE_SWBP_INSN_SIZE); 318 328 319 - ret = anon_vma_prepare(vma); 320 - if (ret) 321 - goto put_new; 322 - 323 329 ret = __replace_page(vma, vaddr, old_page, new_page); 330 + if (ret) 331 + mem_cgroup_uncharge_page(new_page); 324 332 325 333 put_new: 326 334 page_cache_release(new_page); ··· 537 537 void *insn, int nbytes, loff_t offset) 538 538 { 539 539 struct page *page; 540 - 541 - if (!mapping->a_ops->readpage) 542 - return -EIO; 543 540 /* 544 - * Ensure that the page that has the original instruction is 545 - * populated and in page-cache. 541 + * Ensure that the page that has the original instruction is populated 542 + * and in page-cache. If ->readpage == NULL it must be shmem_mapping(), 543 + * see uprobe_register(). 546 544 */ 547 - page = read_mapping_page(mapping, offset >> PAGE_CACHE_SHIFT, filp); 545 + if (mapping->a_ops->readpage) 546 + page = read_mapping_page(mapping, offset >> PAGE_CACHE_SHIFT, filp); 547 + else 548 + page = shmem_read_mapping_page(mapping, offset >> PAGE_CACHE_SHIFT); 548 549 if (IS_ERR(page)) 549 550 return PTR_ERR(page); 550 551 ··· 881 880 if (!uc->handler && !uc->ret_handler) 882 881 return -EINVAL; 883 882 883 + /* copy_insn() uses read_mapping_page() or shmem_read_mapping_page() */ 884 + if (!inode->i_mapping->a_ops->readpage && !shmem_mapping(inode->i_mapping)) 885 + return -EIO; 884 886 /* Racy, just to catch the obvious mistakes */ 885 887 if (offset > i_size_read(inode)) 886 888 return -EINVAL; ··· 1363 1359 unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs) 1364 1360 { 1365 1361 return instruction_pointer(regs) - UPROBE_SWBP_INSN_SIZE; 1362 + } 1363 + 1364 + unsigned long uprobe_get_trap_addr(struct pt_regs *regs) 1365 + { 1366 + struct uprobe_task *utask = current->utask; 1367 + 1368 + if (unlikely(utask && utask->active_uprobe)) 1369 + return utask->vaddr; 1370 + 1371 + return instruction_pointer(regs); 1366 1372 } 1367 1373 1368 1374 /*
+229 -163
kernel/kprobes.c
··· 86 86 return &(kretprobe_table_locks[hash].lock); 87 87 } 88 88 89 - /* 90 - * Normally, functions that we'd want to prohibit kprobes in, are marked 91 - * __kprobes. But, there are cases where such functions already belong to 92 - * a different section (__sched for preempt_schedule) 93 - * 94 - * For such cases, we now have a blacklist 95 - */ 96 - static struct kprobe_blackpoint kprobe_blacklist[] = { 97 - {"preempt_schedule",}, 98 - {"native_get_debugreg",}, 99 - {"irq_entries_start",}, 100 - {"common_interrupt",}, 101 - {"mcount",}, /* mcount can be called from everywhere */ 102 - {NULL} /* Terminator */ 103 - }; 89 + /* Blacklist -- list of struct kprobe_blacklist_entry */ 90 + static LIST_HEAD(kprobe_blacklist); 104 91 105 92 #ifdef __ARCH_WANT_KPROBES_INSN_SLOT 106 93 /* ··· 138 151 .insn_size = MAX_INSN_SIZE, 139 152 .nr_garbage = 0, 140 153 }; 141 - static int __kprobes collect_garbage_slots(struct kprobe_insn_cache *c); 154 + static int collect_garbage_slots(struct kprobe_insn_cache *c); 142 155 143 156 /** 144 157 * __get_insn_slot() - Find a slot on an executable page for an instruction. 145 158 * We allocate an executable page if there's no room on existing ones. 146 159 */ 147 - kprobe_opcode_t __kprobes *__get_insn_slot(struct kprobe_insn_cache *c) 160 + kprobe_opcode_t *__get_insn_slot(struct kprobe_insn_cache *c) 148 161 { 149 162 struct kprobe_insn_page *kip; 150 163 kprobe_opcode_t *slot = NULL; ··· 201 214 } 202 215 203 216 /* Return 1 if all garbages are collected, otherwise 0. */ 204 - static int __kprobes collect_one_slot(struct kprobe_insn_page *kip, int idx) 217 + static int collect_one_slot(struct kprobe_insn_page *kip, int idx) 205 218 { 206 219 kip->slot_used[idx] = SLOT_CLEAN; 207 220 kip->nused--; ··· 222 235 return 0; 223 236 } 224 237 225 - static int __kprobes collect_garbage_slots(struct kprobe_insn_cache *c) 238 + static int collect_garbage_slots(struct kprobe_insn_cache *c) 226 239 { 227 240 struct kprobe_insn_page *kip, *next; 228 241 ··· 244 257 return 0; 245 258 } 246 259 247 - void __kprobes __free_insn_slot(struct kprobe_insn_cache *c, 248 - kprobe_opcode_t *slot, int dirty) 260 + void __free_insn_slot(struct kprobe_insn_cache *c, 261 + kprobe_opcode_t *slot, int dirty) 249 262 { 250 263 struct kprobe_insn_page *kip; 251 264 ··· 301 314 * OR 302 315 * - with preemption disabled - from arch/xxx/kernel/kprobes.c 303 316 */ 304 - struct kprobe __kprobes *get_kprobe(void *addr) 317 + struct kprobe *get_kprobe(void *addr) 305 318 { 306 319 struct hlist_head *head; 307 320 struct kprobe *p; ··· 314 327 315 328 return NULL; 316 329 } 330 + NOKPROBE_SYMBOL(get_kprobe); 317 331 318 - static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs); 332 + static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs); 319 333 320 334 /* Return true if the kprobe is an aggregator */ 321 335 static inline int kprobe_aggrprobe(struct kprobe *p) ··· 348 360 * Call all pre_handler on the list, but ignores its return value. 349 361 * This must be called from arch-dep optimized caller. 350 362 */ 351 - void __kprobes opt_pre_handler(struct kprobe *p, struct pt_regs *regs) 363 + void opt_pre_handler(struct kprobe *p, struct pt_regs *regs) 352 364 { 353 365 struct kprobe *kp; 354 366 ··· 360 372 reset_kprobe_instance(); 361 373 } 362 374 } 375 + NOKPROBE_SYMBOL(opt_pre_handler); 363 376 364 377 /* Free optimized instructions and optimized_kprobe */ 365 - static __kprobes void free_aggr_kprobe(struct kprobe *p) 378 + static void free_aggr_kprobe(struct kprobe *p) 366 379 { 367 380 struct optimized_kprobe *op; 368 381 ··· 401 412 } 402 413 403 414 /* Return true(!0) if the probe is queued on (un)optimizing lists */ 404 - static int __kprobes kprobe_queued(struct kprobe *p) 415 + static int kprobe_queued(struct kprobe *p) 405 416 { 406 417 struct optimized_kprobe *op; 407 418 ··· 417 428 * Return an optimized kprobe whose optimizing code replaces 418 429 * instructions including addr (exclude breakpoint). 419 430 */ 420 - static struct kprobe *__kprobes get_optimized_kprobe(unsigned long addr) 431 + static struct kprobe *get_optimized_kprobe(unsigned long addr) 421 432 { 422 433 int i; 423 434 struct kprobe *p = NULL; ··· 449 460 * Optimize (replace a breakpoint with a jump) kprobes listed on 450 461 * optimizing_list. 451 462 */ 452 - static __kprobes void do_optimize_kprobes(void) 463 + static void do_optimize_kprobes(void) 453 464 { 454 465 /* Optimization never be done when disarmed */ 455 466 if (kprobes_all_disarmed || !kprobes_allow_optimization || ··· 477 488 * Unoptimize (replace a jump with a breakpoint and remove the breakpoint 478 489 * if need) kprobes listed on unoptimizing_list. 479 490 */ 480 - static __kprobes void do_unoptimize_kprobes(void) 491 + static void do_unoptimize_kprobes(void) 481 492 { 482 493 struct optimized_kprobe *op, *tmp; 483 494 ··· 509 520 } 510 521 511 522 /* Reclaim all kprobes on the free_list */ 512 - static __kprobes void do_free_cleaned_kprobes(void) 523 + static void do_free_cleaned_kprobes(void) 513 524 { 514 525 struct optimized_kprobe *op, *tmp; 515 526 ··· 521 532 } 522 533 523 534 /* Start optimizer after OPTIMIZE_DELAY passed */ 524 - static __kprobes void kick_kprobe_optimizer(void) 535 + static void kick_kprobe_optimizer(void) 525 536 { 526 537 schedule_delayed_work(&optimizing_work, OPTIMIZE_DELAY); 527 538 } 528 539 529 540 /* Kprobe jump optimizer */ 530 - static __kprobes void kprobe_optimizer(struct work_struct *work) 541 + static void kprobe_optimizer(struct work_struct *work) 531 542 { 532 543 mutex_lock(&kprobe_mutex); 533 544 /* Lock modules while optimizing kprobes */ ··· 563 574 } 564 575 565 576 /* Wait for completing optimization and unoptimization */ 566 - static __kprobes void wait_for_kprobe_optimizer(void) 577 + static void wait_for_kprobe_optimizer(void) 567 578 { 568 579 mutex_lock(&kprobe_mutex); 569 580 ··· 582 593 } 583 594 584 595 /* Optimize kprobe if p is ready to be optimized */ 585 - static __kprobes void optimize_kprobe(struct kprobe *p) 596 + static void optimize_kprobe(struct kprobe *p) 586 597 { 587 598 struct optimized_kprobe *op; 588 599 ··· 616 627 } 617 628 618 629 /* Short cut to direct unoptimizing */ 619 - static __kprobes void force_unoptimize_kprobe(struct optimized_kprobe *op) 630 + static void force_unoptimize_kprobe(struct optimized_kprobe *op) 620 631 { 621 632 get_online_cpus(); 622 633 arch_unoptimize_kprobe(op); ··· 626 637 } 627 638 628 639 /* Unoptimize a kprobe if p is optimized */ 629 - static __kprobes void unoptimize_kprobe(struct kprobe *p, bool force) 640 + static void unoptimize_kprobe(struct kprobe *p, bool force) 630 641 { 631 642 struct optimized_kprobe *op; 632 643 ··· 686 697 } 687 698 688 699 /* Remove optimized instructions */ 689 - static void __kprobes kill_optimized_kprobe(struct kprobe *p) 700 + static void kill_optimized_kprobe(struct kprobe *p) 690 701 { 691 702 struct optimized_kprobe *op; 692 703 ··· 712 723 } 713 724 714 725 /* Try to prepare optimized instructions */ 715 - static __kprobes void prepare_optimized_kprobe(struct kprobe *p) 726 + static void prepare_optimized_kprobe(struct kprobe *p) 716 727 { 717 728 struct optimized_kprobe *op; 718 729 ··· 721 732 } 722 733 723 734 /* Allocate new optimized_kprobe and try to prepare optimized instructions */ 724 - static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p) 735 + static struct kprobe *alloc_aggr_kprobe(struct kprobe *p) 725 736 { 726 737 struct optimized_kprobe *op; 727 738 ··· 736 747 return &op->kp; 737 748 } 738 749 739 - static void __kprobes init_aggr_kprobe(struct kprobe *ap, struct kprobe *p); 750 + static void init_aggr_kprobe(struct kprobe *ap, struct kprobe *p); 740 751 741 752 /* 742 753 * Prepare an optimized_kprobe and optimize it 743 754 * NOTE: p must be a normal registered kprobe 744 755 */ 745 - static __kprobes void try_to_optimize_kprobe(struct kprobe *p) 756 + static void try_to_optimize_kprobe(struct kprobe *p) 746 757 { 747 758 struct kprobe *ap; 748 759 struct optimized_kprobe *op; ··· 776 787 } 777 788 778 789 #ifdef CONFIG_SYSCTL 779 - static void __kprobes optimize_all_kprobes(void) 790 + static void optimize_all_kprobes(void) 780 791 { 781 792 struct hlist_head *head; 782 793 struct kprobe *p; ··· 799 810 mutex_unlock(&kprobe_mutex); 800 811 } 801 812 802 - static void __kprobes unoptimize_all_kprobes(void) 813 + static void unoptimize_all_kprobes(void) 803 814 { 804 815 struct hlist_head *head; 805 816 struct kprobe *p; ··· 850 861 #endif /* CONFIG_SYSCTL */ 851 862 852 863 /* Put a breakpoint for a probe. Must be called with text_mutex locked */ 853 - static void __kprobes __arm_kprobe(struct kprobe *p) 864 + static void __arm_kprobe(struct kprobe *p) 854 865 { 855 866 struct kprobe *_p; 856 867 ··· 865 876 } 866 877 867 878 /* Remove the breakpoint of a probe. Must be called with text_mutex locked */ 868 - static void __kprobes __disarm_kprobe(struct kprobe *p, bool reopt) 879 + static void __disarm_kprobe(struct kprobe *p, bool reopt) 869 880 { 870 881 struct kprobe *_p; 871 882 ··· 900 911 BUG_ON(kprobe_unused(ap)); 901 912 } 902 913 903 - static __kprobes void free_aggr_kprobe(struct kprobe *p) 914 + static void free_aggr_kprobe(struct kprobe *p) 904 915 { 905 916 arch_remove_kprobe(p); 906 917 kfree(p); 907 918 } 908 919 909 - static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p) 920 + static struct kprobe *alloc_aggr_kprobe(struct kprobe *p) 910 921 { 911 922 return kzalloc(sizeof(struct kprobe), GFP_KERNEL); 912 923 } ··· 920 931 static int kprobe_ftrace_enabled; 921 932 922 933 /* Must ensure p->addr is really on ftrace */ 923 - static int __kprobes prepare_kprobe(struct kprobe *p) 934 + static int prepare_kprobe(struct kprobe *p) 924 935 { 925 936 if (!kprobe_ftrace(p)) 926 937 return arch_prepare_kprobe(p); ··· 929 940 } 930 941 931 942 /* Caller must lock kprobe_mutex */ 932 - static void __kprobes arm_kprobe_ftrace(struct kprobe *p) 943 + static void arm_kprobe_ftrace(struct kprobe *p) 933 944 { 934 945 int ret; 935 946 ··· 944 955 } 945 956 946 957 /* Caller must lock kprobe_mutex */ 947 - static void __kprobes disarm_kprobe_ftrace(struct kprobe *p) 958 + static void disarm_kprobe_ftrace(struct kprobe *p) 948 959 { 949 960 int ret; 950 961 ··· 964 975 #endif 965 976 966 977 /* Arm a kprobe with text_mutex */ 967 - static void __kprobes arm_kprobe(struct kprobe *kp) 978 + static void arm_kprobe(struct kprobe *kp) 968 979 { 969 980 if (unlikely(kprobe_ftrace(kp))) { 970 981 arm_kprobe_ftrace(kp); ··· 981 992 } 982 993 983 994 /* Disarm a kprobe with text_mutex */ 984 - static void __kprobes disarm_kprobe(struct kprobe *kp, bool reopt) 995 + static void disarm_kprobe(struct kprobe *kp, bool reopt) 985 996 { 986 997 if (unlikely(kprobe_ftrace(kp))) { 987 998 disarm_kprobe_ftrace(kp); ··· 997 1008 * Aggregate handlers for multiple kprobes support - these handlers 998 1009 * take care of invoking the individual kprobe handlers on p->list 999 1010 */ 1000 - static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs) 1011 + static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs) 1001 1012 { 1002 1013 struct kprobe *kp; 1003 1014 ··· 1011 1022 } 1012 1023 return 0; 1013 1024 } 1025 + NOKPROBE_SYMBOL(aggr_pre_handler); 1014 1026 1015 - static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs, 1016 - unsigned long flags) 1027 + static void aggr_post_handler(struct kprobe *p, struct pt_regs *regs, 1028 + unsigned long flags) 1017 1029 { 1018 1030 struct kprobe *kp; 1019 1031 ··· 1026 1036 } 1027 1037 } 1028 1038 } 1039 + NOKPROBE_SYMBOL(aggr_post_handler); 1029 1040 1030 - static int __kprobes aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, 1031 - int trapnr) 1041 + static int aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, 1042 + int trapnr) 1032 1043 { 1033 1044 struct kprobe *cur = __this_cpu_read(kprobe_instance); 1034 1045 ··· 1043 1052 } 1044 1053 return 0; 1045 1054 } 1055 + NOKPROBE_SYMBOL(aggr_fault_handler); 1046 1056 1047 - static int __kprobes aggr_break_handler(struct kprobe *p, struct pt_regs *regs) 1057 + static int aggr_break_handler(struct kprobe *p, struct pt_regs *regs) 1048 1058 { 1049 1059 struct kprobe *cur = __this_cpu_read(kprobe_instance); 1050 1060 int ret = 0; ··· 1057 1065 reset_kprobe_instance(); 1058 1066 return ret; 1059 1067 } 1068 + NOKPROBE_SYMBOL(aggr_break_handler); 1060 1069 1061 1070 /* Walks the list and increments nmissed count for multiprobe case */ 1062 - void __kprobes kprobes_inc_nmissed_count(struct kprobe *p) 1071 + void kprobes_inc_nmissed_count(struct kprobe *p) 1063 1072 { 1064 1073 struct kprobe *kp; 1065 1074 if (!kprobe_aggrprobe(p)) { ··· 1071 1078 } 1072 1079 return; 1073 1080 } 1081 + NOKPROBE_SYMBOL(kprobes_inc_nmissed_count); 1074 1082 1075 - void __kprobes recycle_rp_inst(struct kretprobe_instance *ri, 1076 - struct hlist_head *head) 1083 + void recycle_rp_inst(struct kretprobe_instance *ri, 1084 + struct hlist_head *head) 1077 1085 { 1078 1086 struct kretprobe *rp = ri->rp; 1079 1087 ··· 1089 1095 /* Unregistering */ 1090 1096 hlist_add_head(&ri->hlist, head); 1091 1097 } 1098 + NOKPROBE_SYMBOL(recycle_rp_inst); 1092 1099 1093 - void __kprobes kretprobe_hash_lock(struct task_struct *tsk, 1100 + void kretprobe_hash_lock(struct task_struct *tsk, 1094 1101 struct hlist_head **head, unsigned long *flags) 1095 1102 __acquires(hlist_lock) 1096 1103 { ··· 1102 1107 hlist_lock = kretprobe_table_lock_ptr(hash); 1103 1108 raw_spin_lock_irqsave(hlist_lock, *flags); 1104 1109 } 1110 + NOKPROBE_SYMBOL(kretprobe_hash_lock); 1105 1111 1106 - static void __kprobes kretprobe_table_lock(unsigned long hash, 1107 - unsigned long *flags) 1112 + static void kretprobe_table_lock(unsigned long hash, 1113 + unsigned long *flags) 1108 1114 __acquires(hlist_lock) 1109 1115 { 1110 1116 raw_spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); 1111 1117 raw_spin_lock_irqsave(hlist_lock, *flags); 1112 1118 } 1119 + NOKPROBE_SYMBOL(kretprobe_table_lock); 1113 1120 1114 - void __kprobes kretprobe_hash_unlock(struct task_struct *tsk, 1115 - unsigned long *flags) 1121 + void kretprobe_hash_unlock(struct task_struct *tsk, 1122 + unsigned long *flags) 1116 1123 __releases(hlist_lock) 1117 1124 { 1118 1125 unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS); ··· 1123 1126 hlist_lock = kretprobe_table_lock_ptr(hash); 1124 1127 raw_spin_unlock_irqrestore(hlist_lock, *flags); 1125 1128 } 1129 + NOKPROBE_SYMBOL(kretprobe_hash_unlock); 1126 1130 1127 - static void __kprobes kretprobe_table_unlock(unsigned long hash, 1128 - unsigned long *flags) 1131 + static void kretprobe_table_unlock(unsigned long hash, 1132 + unsigned long *flags) 1129 1133 __releases(hlist_lock) 1130 1134 { 1131 1135 raw_spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); 1132 1136 raw_spin_unlock_irqrestore(hlist_lock, *flags); 1133 1137 } 1138 + NOKPROBE_SYMBOL(kretprobe_table_unlock); 1134 1139 1135 1140 /* 1136 1141 * This function is called from finish_task_switch when task tk becomes dead, ··· 1140 1141 * with this task. These left over instances represent probed functions 1141 1142 * that have been called but will never return. 1142 1143 */ 1143 - void __kprobes kprobe_flush_task(struct task_struct *tk) 1144 + void kprobe_flush_task(struct task_struct *tk) 1144 1145 { 1145 1146 struct kretprobe_instance *ri; 1146 1147 struct hlist_head *head, empty_rp; ··· 1165 1166 kfree(ri); 1166 1167 } 1167 1168 } 1169 + NOKPROBE_SYMBOL(kprobe_flush_task); 1168 1170 1169 1171 static inline void free_rp_inst(struct kretprobe *rp) 1170 1172 { ··· 1178 1178 } 1179 1179 } 1180 1180 1181 - static void __kprobes cleanup_rp_inst(struct kretprobe *rp) 1181 + static void cleanup_rp_inst(struct kretprobe *rp) 1182 1182 { 1183 1183 unsigned long flags, hash; 1184 1184 struct kretprobe_instance *ri; ··· 1197 1197 } 1198 1198 free_rp_inst(rp); 1199 1199 } 1200 + NOKPROBE_SYMBOL(cleanup_rp_inst); 1200 1201 1201 1202 /* 1202 1203 * Add the new probe to ap->list. Fail if this is the 1203 1204 * second jprobe at the address - two jprobes can't coexist 1204 1205 */ 1205 - static int __kprobes add_new_kprobe(struct kprobe *ap, struct kprobe *p) 1206 + static int add_new_kprobe(struct kprobe *ap, struct kprobe *p) 1206 1207 { 1207 1208 BUG_ON(kprobe_gone(ap) || kprobe_gone(p)); 1208 1209 ··· 1227 1226 * Fill in the required fields of the "manager kprobe". Replace the 1228 1227 * earlier kprobe in the hlist with the manager kprobe 1229 1228 */ 1230 - static void __kprobes init_aggr_kprobe(struct kprobe *ap, struct kprobe *p) 1229 + static void init_aggr_kprobe(struct kprobe *ap, struct kprobe *p) 1231 1230 { 1232 1231 /* Copy p's insn slot to ap */ 1233 1232 copy_kprobe(p, ap); ··· 1253 1252 * This is the second or subsequent kprobe at the address - handle 1254 1253 * the intricacies 1255 1254 */ 1256 - static int __kprobes register_aggr_kprobe(struct kprobe *orig_p, 1257 - struct kprobe *p) 1255 + static int register_aggr_kprobe(struct kprobe *orig_p, struct kprobe *p) 1258 1256 { 1259 1257 int ret = 0; 1260 1258 struct kprobe *ap = orig_p; ··· 1324 1324 return ret; 1325 1325 } 1326 1326 1327 - static int __kprobes in_kprobes_functions(unsigned long addr) 1327 + bool __weak arch_within_kprobe_blacklist(unsigned long addr) 1328 1328 { 1329 - struct kprobe_blackpoint *kb; 1329 + /* The __kprobes marked functions and entry code must not be probed */ 1330 + return addr >= (unsigned long)__kprobes_text_start && 1331 + addr < (unsigned long)__kprobes_text_end; 1332 + } 1330 1333 1331 - if (addr >= (unsigned long)__kprobes_text_start && 1332 - addr < (unsigned long)__kprobes_text_end) 1333 - return -EINVAL; 1334 + static bool within_kprobe_blacklist(unsigned long addr) 1335 + { 1336 + struct kprobe_blacklist_entry *ent; 1337 + 1338 + if (arch_within_kprobe_blacklist(addr)) 1339 + return true; 1334 1340 /* 1335 1341 * If there exists a kprobe_blacklist, verify and 1336 1342 * fail any probe registration in the prohibited area 1337 1343 */ 1338 - for (kb = kprobe_blacklist; kb->name != NULL; kb++) { 1339 - if (kb->start_addr) { 1340 - if (addr >= kb->start_addr && 1341 - addr < (kb->start_addr + kb->range)) 1342 - return -EINVAL; 1343 - } 1344 + list_for_each_entry(ent, &kprobe_blacklist, list) { 1345 + if (addr >= ent->start_addr && addr < ent->end_addr) 1346 + return true; 1344 1347 } 1345 - return 0; 1348 + 1349 + return false; 1346 1350 } 1347 1351 1348 1352 /* ··· 1355 1351 * This returns encoded errors if it fails to look up symbol or invalid 1356 1352 * combination of parameters. 1357 1353 */ 1358 - static kprobe_opcode_t __kprobes *kprobe_addr(struct kprobe *p) 1354 + static kprobe_opcode_t *kprobe_addr(struct kprobe *p) 1359 1355 { 1360 1356 kprobe_opcode_t *addr = p->addr; 1361 1357 ··· 1378 1374 } 1379 1375 1380 1376 /* Check passed kprobe is valid and return kprobe in kprobe_table. */ 1381 - static struct kprobe * __kprobes __get_valid_kprobe(struct kprobe *p) 1377 + static struct kprobe *__get_valid_kprobe(struct kprobe *p) 1382 1378 { 1383 1379 struct kprobe *ap, *list_p; 1384 1380 ··· 1410 1406 return ret; 1411 1407 } 1412 1408 1413 - static __kprobes int check_kprobe_address_safe(struct kprobe *p, 1414 - struct module **probed_mod) 1409 + static int check_kprobe_address_safe(struct kprobe *p, 1410 + struct module **probed_mod) 1415 1411 { 1416 1412 int ret = 0; 1417 1413 unsigned long ftrace_addr; ··· 1437 1433 1438 1434 /* Ensure it is not in reserved area nor out of text */ 1439 1435 if (!kernel_text_address((unsigned long) p->addr) || 1440 - in_kprobes_functions((unsigned long) p->addr) || 1436 + within_kprobe_blacklist((unsigned long) p->addr) || 1441 1437 jump_label_text_reserved(p->addr, p->addr)) { 1442 1438 ret = -EINVAL; 1443 1439 goto out; ··· 1473 1469 return ret; 1474 1470 } 1475 1471 1476 - int __kprobes register_kprobe(struct kprobe *p) 1472 + int register_kprobe(struct kprobe *p) 1477 1473 { 1478 1474 int ret; 1479 1475 struct kprobe *old_p; ··· 1535 1531 EXPORT_SYMBOL_GPL(register_kprobe); 1536 1532 1537 1533 /* Check if all probes on the aggrprobe are disabled */ 1538 - static int __kprobes aggr_kprobe_disabled(struct kprobe *ap) 1534 + static int aggr_kprobe_disabled(struct kprobe *ap) 1539 1535 { 1540 1536 struct kprobe *kp; 1541 1537 ··· 1551 1547 } 1552 1548 1553 1549 /* Disable one kprobe: Make sure called under kprobe_mutex is locked */ 1554 - static struct kprobe *__kprobes __disable_kprobe(struct kprobe *p) 1550 + static struct kprobe *__disable_kprobe(struct kprobe *p) 1555 1551 { 1556 1552 struct kprobe *orig_p; 1557 1553 ··· 1578 1574 /* 1579 1575 * Unregister a kprobe without a scheduler synchronization. 1580 1576 */ 1581 - static int __kprobes __unregister_kprobe_top(struct kprobe *p) 1577 + static int __unregister_kprobe_top(struct kprobe *p) 1582 1578 { 1583 1579 struct kprobe *ap, *list_p; 1584 1580 ··· 1635 1631 return 0; 1636 1632 } 1637 1633 1638 - static void __kprobes __unregister_kprobe_bottom(struct kprobe *p) 1634 + static void __unregister_kprobe_bottom(struct kprobe *p) 1639 1635 { 1640 1636 struct kprobe *ap; 1641 1637 ··· 1651 1647 /* Otherwise, do nothing. */ 1652 1648 } 1653 1649 1654 - int __kprobes register_kprobes(struct kprobe **kps, int num) 1650 + int register_kprobes(struct kprobe **kps, int num) 1655 1651 { 1656 1652 int i, ret = 0; 1657 1653 ··· 1669 1665 } 1670 1666 EXPORT_SYMBOL_GPL(register_kprobes); 1671 1667 1672 - void __kprobes unregister_kprobe(struct kprobe *p) 1668 + void unregister_kprobe(struct kprobe *p) 1673 1669 { 1674 1670 unregister_kprobes(&p, 1); 1675 1671 } 1676 1672 EXPORT_SYMBOL_GPL(unregister_kprobe); 1677 1673 1678 - void __kprobes unregister_kprobes(struct kprobe **kps, int num) 1674 + void unregister_kprobes(struct kprobe **kps, int num) 1679 1675 { 1680 1676 int i; 1681 1677 ··· 1704 1700 return (unsigned long)entry; 1705 1701 } 1706 1702 1707 - int __kprobes register_jprobes(struct jprobe **jps, int num) 1703 + int register_jprobes(struct jprobe **jps, int num) 1708 1704 { 1709 1705 struct jprobe *jp; 1710 1706 int ret = 0, i; ··· 1735 1731 } 1736 1732 EXPORT_SYMBOL_GPL(register_jprobes); 1737 1733 1738 - int __kprobes register_jprobe(struct jprobe *jp) 1734 + int register_jprobe(struct jprobe *jp) 1739 1735 { 1740 1736 return register_jprobes(&jp, 1); 1741 1737 } 1742 1738 EXPORT_SYMBOL_GPL(register_jprobe); 1743 1739 1744 - void __kprobes unregister_jprobe(struct jprobe *jp) 1740 + void unregister_jprobe(struct jprobe *jp) 1745 1741 { 1746 1742 unregister_jprobes(&jp, 1); 1747 1743 } 1748 1744 EXPORT_SYMBOL_GPL(unregister_jprobe); 1749 1745 1750 - void __kprobes unregister_jprobes(struct jprobe **jps, int num) 1746 + void unregister_jprobes(struct jprobe **jps, int num) 1751 1747 { 1752 1748 int i; 1753 1749 ··· 1772 1768 * This kprobe pre_handler is registered with every kretprobe. When probe 1773 1769 * hits it will set up the return probe. 1774 1770 */ 1775 - static int __kprobes pre_handler_kretprobe(struct kprobe *p, 1776 - struct pt_regs *regs) 1771 + static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs) 1777 1772 { 1778 1773 struct kretprobe *rp = container_of(p, struct kretprobe, kp); 1779 1774 unsigned long hash, flags = 0; ··· 1810 1807 } 1811 1808 return 0; 1812 1809 } 1810 + NOKPROBE_SYMBOL(pre_handler_kretprobe); 1813 1811 1814 - int __kprobes register_kretprobe(struct kretprobe *rp) 1812 + int register_kretprobe(struct kretprobe *rp) 1815 1813 { 1816 1814 int ret = 0; 1817 1815 struct kretprobe_instance *inst; ··· 1865 1861 } 1866 1862 EXPORT_SYMBOL_GPL(register_kretprobe); 1867 1863 1868 - int __kprobes register_kretprobes(struct kretprobe **rps, int num) 1864 + int register_kretprobes(struct kretprobe **rps, int num) 1869 1865 { 1870 1866 int ret = 0, i; 1871 1867 ··· 1883 1879 } 1884 1880 EXPORT_SYMBOL_GPL(register_kretprobes); 1885 1881 1886 - void __kprobes unregister_kretprobe(struct kretprobe *rp) 1882 + void unregister_kretprobe(struct kretprobe *rp) 1887 1883 { 1888 1884 unregister_kretprobes(&rp, 1); 1889 1885 } 1890 1886 EXPORT_SYMBOL_GPL(unregister_kretprobe); 1891 1887 1892 - void __kprobes unregister_kretprobes(struct kretprobe **rps, int num) 1888 + void unregister_kretprobes(struct kretprobe **rps, int num) 1893 1889 { 1894 1890 int i; 1895 1891 ··· 1912 1908 EXPORT_SYMBOL_GPL(unregister_kretprobes); 1913 1909 1914 1910 #else /* CONFIG_KRETPROBES */ 1915 - int __kprobes register_kretprobe(struct kretprobe *rp) 1911 + int register_kretprobe(struct kretprobe *rp) 1916 1912 { 1917 1913 return -ENOSYS; 1918 1914 } 1919 1915 EXPORT_SYMBOL_GPL(register_kretprobe); 1920 1916 1921 - int __kprobes register_kretprobes(struct kretprobe **rps, int num) 1917 + int register_kretprobes(struct kretprobe **rps, int num) 1922 1918 { 1923 1919 return -ENOSYS; 1924 1920 } 1925 1921 EXPORT_SYMBOL_GPL(register_kretprobes); 1926 1922 1927 - void __kprobes unregister_kretprobe(struct kretprobe *rp) 1923 + void unregister_kretprobe(struct kretprobe *rp) 1928 1924 { 1929 1925 } 1930 1926 EXPORT_SYMBOL_GPL(unregister_kretprobe); 1931 1927 1932 - void __kprobes unregister_kretprobes(struct kretprobe **rps, int num) 1928 + void unregister_kretprobes(struct kretprobe **rps, int num) 1933 1929 { 1934 1930 } 1935 1931 EXPORT_SYMBOL_GPL(unregister_kretprobes); 1936 1932 1937 - static int __kprobes pre_handler_kretprobe(struct kprobe *p, 1938 - struct pt_regs *regs) 1933 + static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs) 1939 1934 { 1940 1935 return 0; 1941 1936 } 1937 + NOKPROBE_SYMBOL(pre_handler_kretprobe); 1942 1938 1943 1939 #endif /* CONFIG_KRETPROBES */ 1944 1940 1945 1941 /* Set the kprobe gone and remove its instruction buffer. */ 1946 - static void __kprobes kill_kprobe(struct kprobe *p) 1942 + static void kill_kprobe(struct kprobe *p) 1947 1943 { 1948 1944 struct kprobe *kp; 1949 1945 ··· 1967 1963 } 1968 1964 1969 1965 /* Disable one kprobe */ 1970 - int __kprobes disable_kprobe(struct kprobe *kp) 1966 + int disable_kprobe(struct kprobe *kp) 1971 1967 { 1972 1968 int ret = 0; 1973 1969 ··· 1983 1979 EXPORT_SYMBOL_GPL(disable_kprobe); 1984 1980 1985 1981 /* Enable one kprobe */ 1986 - int __kprobes enable_kprobe(struct kprobe *kp) 1982 + int enable_kprobe(struct kprobe *kp) 1987 1983 { 1988 1984 int ret = 0; 1989 1985 struct kprobe *p; ··· 2016 2012 } 2017 2013 EXPORT_SYMBOL_GPL(enable_kprobe); 2018 2014 2019 - void __kprobes dump_kprobe(struct kprobe *kp) 2015 + void dump_kprobe(struct kprobe *kp) 2020 2016 { 2021 2017 printk(KERN_WARNING "Dumping kprobe:\n"); 2022 2018 printk(KERN_WARNING "Name: %s\nAddress: %p\nOffset: %x\n", 2023 2019 kp->symbol_name, kp->addr, kp->offset); 2024 2020 } 2021 + NOKPROBE_SYMBOL(dump_kprobe); 2022 + 2023 + /* 2024 + * Lookup and populate the kprobe_blacklist. 2025 + * 2026 + * Unlike the kretprobe blacklist, we'll need to determine 2027 + * the range of addresses that belong to the said functions, 2028 + * since a kprobe need not necessarily be at the beginning 2029 + * of a function. 2030 + */ 2031 + static int __init populate_kprobe_blacklist(unsigned long *start, 2032 + unsigned long *end) 2033 + { 2034 + unsigned long *iter; 2035 + struct kprobe_blacklist_entry *ent; 2036 + unsigned long offset = 0, size = 0; 2037 + 2038 + for (iter = start; iter < end; iter++) { 2039 + if (!kallsyms_lookup_size_offset(*iter, &size, &offset)) { 2040 + pr_err("Failed to find blacklist %p\n", (void *)*iter); 2041 + continue; 2042 + } 2043 + 2044 + ent = kmalloc(sizeof(*ent), GFP_KERNEL); 2045 + if (!ent) 2046 + return -ENOMEM; 2047 + ent->start_addr = *iter; 2048 + ent->end_addr = *iter + size; 2049 + INIT_LIST_HEAD(&ent->list); 2050 + list_add_tail(&ent->list, &kprobe_blacklist); 2051 + } 2052 + return 0; 2053 + } 2025 2054 2026 2055 /* Module notifier call back, checking kprobes on the module */ 2027 - static int __kprobes kprobes_module_callback(struct notifier_block *nb, 2028 - unsigned long val, void *data) 2056 + static int kprobes_module_callback(struct notifier_block *nb, 2057 + unsigned long val, void *data) 2029 2058 { 2030 2059 struct module *mod = data; 2031 2060 struct hlist_head *head; ··· 2099 2062 .priority = 0 2100 2063 }; 2101 2064 2065 + /* Markers of _kprobe_blacklist section */ 2066 + extern unsigned long __start_kprobe_blacklist[]; 2067 + extern unsigned long __stop_kprobe_blacklist[]; 2068 + 2102 2069 static int __init init_kprobes(void) 2103 2070 { 2104 2071 int i, err = 0; 2105 - unsigned long offset = 0, size = 0; 2106 - char *modname, namebuf[KSYM_NAME_LEN]; 2107 - const char *symbol_name; 2108 - void *addr; 2109 - struct kprobe_blackpoint *kb; 2110 2072 2111 2073 /* FIXME allocate the probe table, currently defined statically */ 2112 2074 /* initialize all list heads */ ··· 2115 2079 raw_spin_lock_init(&(kretprobe_table_locks[i].lock)); 2116 2080 } 2117 2081 2118 - /* 2119 - * Lookup and populate the kprobe_blacklist. 2120 - * 2121 - * Unlike the kretprobe blacklist, we'll need to determine 2122 - * the range of addresses that belong to the said functions, 2123 - * since a kprobe need not necessarily be at the beginning 2124 - * of a function. 2125 - */ 2126 - for (kb = kprobe_blacklist; kb->name != NULL; kb++) { 2127 - kprobe_lookup_name(kb->name, addr); 2128 - if (!addr) 2129 - continue; 2130 - 2131 - kb->start_addr = (unsigned long)addr; 2132 - symbol_name = kallsyms_lookup(kb->start_addr, 2133 - &size, &offset, &modname, namebuf); 2134 - if (!symbol_name) 2135 - kb->range = 0; 2136 - else 2137 - kb->range = size; 2082 + err = populate_kprobe_blacklist(__start_kprobe_blacklist, 2083 + __stop_kprobe_blacklist); 2084 + if (err) { 2085 + pr_err("kprobes: failed to populate blacklist: %d\n", err); 2086 + pr_err("Please take care of using kprobes.\n"); 2138 2087 } 2139 2088 2140 2089 if (kretprobe_blacklist_size) { ··· 2159 2138 } 2160 2139 2161 2140 #ifdef CONFIG_DEBUG_FS 2162 - static void __kprobes report_probe(struct seq_file *pi, struct kprobe *p, 2141 + static void report_probe(struct seq_file *pi, struct kprobe *p, 2163 2142 const char *sym, int offset, char *modname, struct kprobe *pp) 2164 2143 { 2165 2144 char *kprobe_type; ··· 2188 2167 (kprobe_ftrace(pp) ? "[FTRACE]" : "")); 2189 2168 } 2190 2169 2191 - static void __kprobes *kprobe_seq_start(struct seq_file *f, loff_t *pos) 2170 + static void *kprobe_seq_start(struct seq_file *f, loff_t *pos) 2192 2171 { 2193 2172 return (*pos < KPROBE_TABLE_SIZE) ? pos : NULL; 2194 2173 } 2195 2174 2196 - static void __kprobes *kprobe_seq_next(struct seq_file *f, void *v, loff_t *pos) 2175 + static void *kprobe_seq_next(struct seq_file *f, void *v, loff_t *pos) 2197 2176 { 2198 2177 (*pos)++; 2199 2178 if (*pos >= KPROBE_TABLE_SIZE) ··· 2201 2180 return pos; 2202 2181 } 2203 2182 2204 - static void __kprobes kprobe_seq_stop(struct seq_file *f, void *v) 2183 + static void kprobe_seq_stop(struct seq_file *f, void *v) 2205 2184 { 2206 2185 /* Nothing to do */ 2207 2186 } 2208 2187 2209 - static int __kprobes show_kprobe_addr(struct seq_file *pi, void *v) 2188 + static int show_kprobe_addr(struct seq_file *pi, void *v) 2210 2189 { 2211 2190 struct hlist_head *head; 2212 2191 struct kprobe *p, *kp; ··· 2237 2216 .show = show_kprobe_addr 2238 2217 }; 2239 2218 2240 - static int __kprobes kprobes_open(struct inode *inode, struct file *filp) 2219 + static int kprobes_open(struct inode *inode, struct file *filp) 2241 2220 { 2242 2221 return seq_open(filp, &kprobes_seq_ops); 2243 2222 } ··· 2249 2228 .release = seq_release, 2250 2229 }; 2251 2230 2252 - static void __kprobes arm_all_kprobes(void) 2231 + /* kprobes/blacklist -- shows which functions can not be probed */ 2232 + static void *kprobe_blacklist_seq_start(struct seq_file *m, loff_t *pos) 2233 + { 2234 + return seq_list_start(&kprobe_blacklist, *pos); 2235 + } 2236 + 2237 + static void *kprobe_blacklist_seq_next(struct seq_file *m, void *v, loff_t *pos) 2238 + { 2239 + return seq_list_next(v, &kprobe_blacklist, pos); 2240 + } 2241 + 2242 + static int kprobe_blacklist_seq_show(struct seq_file *m, void *v) 2243 + { 2244 + struct kprobe_blacklist_entry *ent = 2245 + list_entry(v, struct kprobe_blacklist_entry, list); 2246 + 2247 + seq_printf(m, "0x%p-0x%p\t%ps\n", (void *)ent->start_addr, 2248 + (void *)ent->end_addr, (void *)ent->start_addr); 2249 + return 0; 2250 + } 2251 + 2252 + static const struct seq_operations kprobe_blacklist_seq_ops = { 2253 + .start = kprobe_blacklist_seq_start, 2254 + .next = kprobe_blacklist_seq_next, 2255 + .stop = kprobe_seq_stop, /* Reuse void function */ 2256 + .show = kprobe_blacklist_seq_show, 2257 + }; 2258 + 2259 + static int kprobe_blacklist_open(struct inode *inode, struct file *filp) 2260 + { 2261 + return seq_open(filp, &kprobe_blacklist_seq_ops); 2262 + } 2263 + 2264 + static const struct file_operations debugfs_kprobe_blacklist_ops = { 2265 + .open = kprobe_blacklist_open, 2266 + .read = seq_read, 2267 + .llseek = seq_lseek, 2268 + .release = seq_release, 2269 + }; 2270 + 2271 + static void arm_all_kprobes(void) 2253 2272 { 2254 2273 struct hlist_head *head; 2255 2274 struct kprobe *p; ··· 2317 2256 return; 2318 2257 } 2319 2258 2320 - static void __kprobes disarm_all_kprobes(void) 2259 + static void disarm_all_kprobes(void) 2321 2260 { 2322 2261 struct hlist_head *head; 2323 2262 struct kprobe *p; ··· 2401 2340 .llseek = default_llseek, 2402 2341 }; 2403 2342 2404 - static int __kprobes debugfs_kprobe_init(void) 2343 + static int __init debugfs_kprobe_init(void) 2405 2344 { 2406 2345 struct dentry *dir, *file; 2407 2346 unsigned int value = 1; ··· 2412 2351 2413 2352 file = debugfs_create_file("list", 0444, dir, NULL, 2414 2353 &debugfs_kprobes_operations); 2415 - if (!file) { 2416 - debugfs_remove(dir); 2417 - return -ENOMEM; 2418 - } 2354 + if (!file) 2355 + goto error; 2419 2356 2420 2357 file = debugfs_create_file("enabled", 0600, dir, 2421 2358 &value, &fops_kp); 2422 - if (!file) { 2423 - debugfs_remove(dir); 2424 - return -ENOMEM; 2425 - } 2359 + if (!file) 2360 + goto error; 2361 + 2362 + file = debugfs_create_file("blacklist", 0444, dir, NULL, 2363 + &debugfs_kprobe_blacklist_ops); 2364 + if (!file) 2365 + goto error; 2426 2366 2427 2367 return 0; 2368 + 2369 + error: 2370 + debugfs_remove(dir); 2371 + return -ENOMEM; 2428 2372 } 2429 2373 2430 2374 late_initcall(debugfs_kprobe_init);
+13 -9
kernel/notifier.c
··· 71 71 * @returns: notifier_call_chain returns the value returned by the 72 72 * last notifier function called. 73 73 */ 74 - static int __kprobes notifier_call_chain(struct notifier_block **nl, 75 - unsigned long val, void *v, 76 - int nr_to_call, int *nr_calls) 74 + static int notifier_call_chain(struct notifier_block **nl, 75 + unsigned long val, void *v, 76 + int nr_to_call, int *nr_calls) 77 77 { 78 78 int ret = NOTIFY_DONE; 79 79 struct notifier_block *nb, *next_nb; ··· 102 102 } 103 103 return ret; 104 104 } 105 + NOKPROBE_SYMBOL(notifier_call_chain); 105 106 106 107 /* 107 108 * Atomic notifier chain routines. Registration and unregistration ··· 173 172 * Otherwise the return value is the return value 174 173 * of the last notifier function called. 175 174 */ 176 - int __kprobes __atomic_notifier_call_chain(struct atomic_notifier_head *nh, 177 - unsigned long val, void *v, 178 - int nr_to_call, int *nr_calls) 175 + int __atomic_notifier_call_chain(struct atomic_notifier_head *nh, 176 + unsigned long val, void *v, 177 + int nr_to_call, int *nr_calls) 179 178 { 180 179 int ret; 181 180 ··· 185 184 return ret; 186 185 } 187 186 EXPORT_SYMBOL_GPL(__atomic_notifier_call_chain); 187 + NOKPROBE_SYMBOL(__atomic_notifier_call_chain); 188 188 189 - int __kprobes atomic_notifier_call_chain(struct atomic_notifier_head *nh, 190 - unsigned long val, void *v) 189 + int atomic_notifier_call_chain(struct atomic_notifier_head *nh, 190 + unsigned long val, void *v) 191 191 { 192 192 return __atomic_notifier_call_chain(nh, val, v, -1, NULL); 193 193 } 194 194 EXPORT_SYMBOL_GPL(atomic_notifier_call_chain); 195 + NOKPROBE_SYMBOL(atomic_notifier_call_chain); 195 196 196 197 /* 197 198 * Blocking notifier chain routines. All access to the chain is ··· 530 527 531 528 static ATOMIC_NOTIFIER_HEAD(die_chain); 532 529 533 - int notrace __kprobes notify_die(enum die_val val, const char *str, 530 + int notrace notify_die(enum die_val val, const char *str, 534 531 struct pt_regs *regs, long err, int trap, int sig) 535 532 { 536 533 struct die_args args = { ··· 543 540 }; 544 541 return atomic_notifier_call_chain(&die_chain, val, &args); 545 542 } 543 + NOKPROBE_SYMBOL(notify_die); 546 544 547 545 int register_die_notifier(struct notifier_block *nb) 548 546 {
+5 -2
kernel/sched/core.c
··· 2527 2527 #if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \ 2528 2528 defined(CONFIG_PREEMPT_TRACER)) 2529 2529 2530 - void __kprobes preempt_count_add(int val) 2530 + void preempt_count_add(int val) 2531 2531 { 2532 2532 #ifdef CONFIG_DEBUG_PREEMPT 2533 2533 /* ··· 2553 2553 } 2554 2554 } 2555 2555 EXPORT_SYMBOL(preempt_count_add); 2556 + NOKPROBE_SYMBOL(preempt_count_add); 2556 2557 2557 - void __kprobes preempt_count_sub(int val) 2558 + void preempt_count_sub(int val) 2558 2559 { 2559 2560 #ifdef CONFIG_DEBUG_PREEMPT 2560 2561 /* ··· 2576 2575 __preempt_count_sub(val); 2577 2576 } 2578 2577 EXPORT_SYMBOL(preempt_count_sub); 2578 + NOKPROBE_SYMBOL(preempt_count_sub); 2579 2579 2580 2580 #endif 2581 2581 ··· 2859 2857 barrier(); 2860 2858 } while (need_resched()); 2861 2859 } 2860 + NOKPROBE_SYMBOL(preempt_schedule); 2862 2861 EXPORT_SYMBOL(preempt_schedule); 2863 2862 #endif /* CONFIG_PREEMPT */ 2864 2863
+3 -2
kernel/trace/trace_event_perf.c
··· 248 248 tp_event->class->reg(tp_event, TRACE_REG_PERF_DEL, p_event); 249 249 } 250 250 251 - __kprobes void *perf_trace_buf_prepare(int size, unsigned short type, 252 - struct pt_regs *regs, int *rctxp) 251 + void *perf_trace_buf_prepare(int size, unsigned short type, 252 + struct pt_regs *regs, int *rctxp) 253 253 { 254 254 struct trace_entry *entry; 255 255 unsigned long flags; ··· 281 281 return raw_data; 282 282 } 283 283 EXPORT_SYMBOL_GPL(perf_trace_buf_prepare); 284 + NOKPROBE_SYMBOL(perf_trace_buf_prepare); 284 285 285 286 #ifdef CONFIG_FUNCTION_TRACER 286 287 static void
+41 -30
kernel/trace/trace_kprobe.c
··· 40 40 (sizeof(struct probe_arg) * (n))) 41 41 42 42 43 - static __kprobes bool trace_kprobe_is_return(struct trace_kprobe *tk) 43 + static nokprobe_inline bool trace_kprobe_is_return(struct trace_kprobe *tk) 44 44 { 45 45 return tk->rp.handler != NULL; 46 46 } 47 47 48 - static __kprobes const char *trace_kprobe_symbol(struct trace_kprobe *tk) 48 + static nokprobe_inline const char *trace_kprobe_symbol(struct trace_kprobe *tk) 49 49 { 50 50 return tk->symbol ? tk->symbol : "unknown"; 51 51 } 52 52 53 - static __kprobes unsigned long trace_kprobe_offset(struct trace_kprobe *tk) 53 + static nokprobe_inline unsigned long trace_kprobe_offset(struct trace_kprobe *tk) 54 54 { 55 55 return tk->rp.kp.offset; 56 56 } 57 57 58 - static __kprobes bool trace_kprobe_has_gone(struct trace_kprobe *tk) 58 + static nokprobe_inline bool trace_kprobe_has_gone(struct trace_kprobe *tk) 59 59 { 60 60 return !!(kprobe_gone(&tk->rp.kp)); 61 61 } 62 62 63 - static __kprobes bool trace_kprobe_within_module(struct trace_kprobe *tk, 63 + static nokprobe_inline bool trace_kprobe_within_module(struct trace_kprobe *tk, 64 64 struct module *mod) 65 65 { 66 66 int len = strlen(mod->name); ··· 68 68 return strncmp(mod->name, name, len) == 0 && name[len] == ':'; 69 69 } 70 70 71 - static __kprobes bool trace_kprobe_is_on_module(struct trace_kprobe *tk) 71 + static nokprobe_inline bool trace_kprobe_is_on_module(struct trace_kprobe *tk) 72 72 { 73 73 return !!strchr(trace_kprobe_symbol(tk), ':'); 74 74 } ··· 132 132 * Kprobes-specific fetch functions 133 133 */ 134 134 #define DEFINE_FETCH_stack(type) \ 135 - static __kprobes void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,\ 135 + static void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs, \ 136 136 void *offset, void *dest) \ 137 137 { \ 138 138 *(type *)dest = (type)regs_get_kernel_stack_nth(regs, \ 139 139 (unsigned int)((unsigned long)offset)); \ 140 - } 140 + } \ 141 + NOKPROBE_SYMBOL(FETCH_FUNC_NAME(stack, type)); 142 + 141 143 DEFINE_BASIC_FETCH_FUNCS(stack) 142 144 /* No string on the stack entry */ 143 145 #define fetch_stack_string NULL 144 146 #define fetch_stack_string_size NULL 145 147 146 148 #define DEFINE_FETCH_memory(type) \ 147 - static __kprobes void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,\ 149 + static void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs, \ 148 150 void *addr, void *dest) \ 149 151 { \ 150 152 type retval; \ ··· 154 152 *(type *)dest = 0; \ 155 153 else \ 156 154 *(type *)dest = retval; \ 157 - } 155 + } \ 156 + NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, type)); 157 + 158 158 DEFINE_BASIC_FETCH_FUNCS(memory) 159 159 /* 160 160 * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max 161 161 * length and relative data location. 162 162 */ 163 - static __kprobes void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs, 164 - void *addr, void *dest) 163 + static void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs, 164 + void *addr, void *dest) 165 165 { 166 166 long ret; 167 167 int maxlen = get_rloc_len(*(u32 *)dest); ··· 197 193 get_rloc_offs(*(u32 *)dest)); 198 194 } 199 195 } 196 + NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, string)); 200 197 201 198 /* Return the length of string -- including null terminal byte */ 202 - static __kprobes void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs, 203 - void *addr, void *dest) 199 + static void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs, 200 + void *addr, void *dest) 204 201 { 205 202 mm_segment_t old_fs; 206 203 int ret, len = 0; ··· 224 219 else 225 220 *(u32 *)dest = len; 226 221 } 222 + NOKPROBE_SYMBOL(FETCH_FUNC_NAME(memory, string_size)); 227 223 228 224 #define DEFINE_FETCH_symbol(type) \ 229 - __kprobes void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs, \ 230 - void *data, void *dest) \ 225 + void FETCH_FUNC_NAME(symbol, type)(struct pt_regs *regs, void *data, void *dest)\ 231 226 { \ 232 227 struct symbol_cache *sc = data; \ 233 228 if (sc->addr) \ 234 229 fetch_memory_##type(regs, (void *)sc->addr, dest); \ 235 230 else \ 236 231 *(type *)dest = 0; \ 237 - } 232 + } \ 233 + NOKPROBE_SYMBOL(FETCH_FUNC_NAME(symbol, type)); 234 + 238 235 DEFINE_BASIC_FETCH_FUNCS(symbol) 239 236 DEFINE_FETCH_symbol(string) 240 237 DEFINE_FETCH_symbol(string_size) ··· 914 907 }; 915 908 916 909 /* Kprobe handler */ 917 - static __kprobes void 910 + static nokprobe_inline void 918 911 __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs, 919 912 struct ftrace_event_file *ftrace_file) 920 913 { ··· 950 943 entry, irq_flags, pc, regs); 951 944 } 952 945 953 - static __kprobes void 946 + static void 954 947 kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs) 955 948 { 956 949 struct event_file_link *link; ··· 958 951 list_for_each_entry_rcu(link, &tk->tp.files, list) 959 952 __kprobe_trace_func(tk, regs, link->file); 960 953 } 954 + NOKPROBE_SYMBOL(kprobe_trace_func); 961 955 962 956 /* Kretprobe handler */ 963 - static __kprobes void 957 + static nokprobe_inline void 964 958 __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, 965 959 struct pt_regs *regs, 966 960 struct ftrace_event_file *ftrace_file) ··· 999 991 entry, irq_flags, pc, regs); 1000 992 } 1001 993 1002 - static __kprobes void 994 + static void 1003 995 kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, 1004 996 struct pt_regs *regs) 1005 997 { ··· 1008 1000 list_for_each_entry_rcu(link, &tk->tp.files, list) 1009 1001 __kretprobe_trace_func(tk, ri, regs, link->file); 1010 1002 } 1003 + NOKPROBE_SYMBOL(kretprobe_trace_func); 1011 1004 1012 1005 /* Event entry printers */ 1013 1006 static enum print_line_t ··· 1140 1131 #ifdef CONFIG_PERF_EVENTS 1141 1132 1142 1133 /* Kprobe profile handler */ 1143 - static __kprobes void 1134 + static void 1144 1135 kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) 1145 1136 { 1146 1137 struct ftrace_event_call *call = &tk->tp.call; ··· 1167 1158 store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize); 1168 1159 perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL); 1169 1160 } 1161 + NOKPROBE_SYMBOL(kprobe_perf_func); 1170 1162 1171 1163 /* Kretprobe profile handler */ 1172 - static __kprobes void 1164 + static void 1173 1165 kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, 1174 1166 struct pt_regs *regs) 1175 1167 { ··· 1198 1188 store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize); 1199 1189 perf_trace_buf_submit(entry, size, rctx, 0, 1, regs, head, NULL); 1200 1190 } 1191 + NOKPROBE_SYMBOL(kretprobe_perf_func); 1201 1192 #endif /* CONFIG_PERF_EVENTS */ 1202 1193 1203 1194 /* ··· 1207 1196 * kprobe_trace_self_tests_init() does enable_trace_probe/disable_trace_probe 1208 1197 * lockless, but we can't race with this __init function. 1209 1198 */ 1210 - static __kprobes 1211 - int kprobe_register(struct ftrace_event_call *event, 1212 - enum trace_reg type, void *data) 1199 + static int kprobe_register(struct ftrace_event_call *event, 1200 + enum trace_reg type, void *data) 1213 1201 { 1214 1202 struct trace_kprobe *tk = (struct trace_kprobe *)event->data; 1215 1203 struct ftrace_event_file *file = data; ··· 1234 1224 return 0; 1235 1225 } 1236 1226 1237 - static __kprobes 1238 - int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) 1227 + static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) 1239 1228 { 1240 1229 struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp); 1241 1230 ··· 1248 1239 #endif 1249 1240 return 0; /* We don't tweek kernel, so just return 0 */ 1250 1241 } 1242 + NOKPROBE_SYMBOL(kprobe_dispatcher); 1251 1243 1252 - static __kprobes 1253 - int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs) 1244 + static int 1245 + kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs) 1254 1246 { 1255 1247 struct trace_kprobe *tk = container_of(ri->rp, struct trace_kprobe, rp); 1256 1248 ··· 1265 1255 #endif 1266 1256 return 0; /* We don't tweek kernel, so just return 0 */ 1267 1257 } 1258 + NOKPROBE_SYMBOL(kretprobe_dispatcher); 1268 1259 1269 1260 static struct trace_event_functions kretprobe_funcs = { 1270 1261 .trace = print_kretprobe_event
+35 -30
kernel/trace/trace_probe.c
··· 37 37 38 38 /* Printing in basic type function template */ 39 39 #define DEFINE_BASIC_PRINT_TYPE_FUNC(type, fmt) \ 40 - __kprobes int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, \ 41 - const char *name, \ 42 - void *data, void *ent) \ 40 + int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, const char *name, \ 41 + void *data, void *ent) \ 43 42 { \ 44 43 return trace_seq_printf(s, " %s=" fmt, name, *(type *)data); \ 45 44 } \ 46 - const char PRINT_TYPE_FMT_NAME(type)[] = fmt; 45 + const char PRINT_TYPE_FMT_NAME(type)[] = fmt; \ 46 + NOKPROBE_SYMBOL(PRINT_TYPE_FUNC_NAME(type)); 47 47 48 48 DEFINE_BASIC_PRINT_TYPE_FUNC(u8 , "0x%x") 49 49 DEFINE_BASIC_PRINT_TYPE_FUNC(u16, "0x%x") ··· 55 55 DEFINE_BASIC_PRINT_TYPE_FUNC(s64, "%Ld") 56 56 57 57 /* Print type function for string type */ 58 - __kprobes int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s, 59 - const char *name, 60 - void *data, void *ent) 58 + int PRINT_TYPE_FUNC_NAME(string)(struct trace_seq *s, const char *name, 59 + void *data, void *ent) 61 60 { 62 61 int len = *(u32 *)data >> 16; 63 62 ··· 66 67 return trace_seq_printf(s, " %s=\"%s\"", name, 67 68 (const char *)get_loc_data(data, ent)); 68 69 } 70 + NOKPROBE_SYMBOL(PRINT_TYPE_FUNC_NAME(string)); 69 71 70 72 const char PRINT_TYPE_FMT_NAME(string)[] = "\\\"%s\\\""; 71 73 ··· 81 81 82 82 /* Data fetch function templates */ 83 83 #define DEFINE_FETCH_reg(type) \ 84 - __kprobes void FETCH_FUNC_NAME(reg, type)(struct pt_regs *regs, \ 85 - void *offset, void *dest) \ 84 + void FETCH_FUNC_NAME(reg, type)(struct pt_regs *regs, void *offset, void *dest) \ 86 85 { \ 87 86 *(type *)dest = (type)regs_get_register(regs, \ 88 87 (unsigned int)((unsigned long)offset)); \ 89 - } 88 + } \ 89 + NOKPROBE_SYMBOL(FETCH_FUNC_NAME(reg, type)); 90 90 DEFINE_BASIC_FETCH_FUNCS(reg) 91 91 /* No string on the register */ 92 92 #define fetch_reg_string NULL 93 93 #define fetch_reg_string_size NULL 94 94 95 95 #define DEFINE_FETCH_retval(type) \ 96 - __kprobes void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs, \ 97 - void *dummy, void *dest) \ 96 + void FETCH_FUNC_NAME(retval, type)(struct pt_regs *regs, \ 97 + void *dummy, void *dest) \ 98 98 { \ 99 99 *(type *)dest = (type)regs_return_value(regs); \ 100 - } 100 + } \ 101 + NOKPROBE_SYMBOL(FETCH_FUNC_NAME(retval, type)); 101 102 DEFINE_BASIC_FETCH_FUNCS(retval) 102 103 /* No string on the retval */ 103 104 #define fetch_retval_string NULL ··· 113 112 }; 114 113 115 114 #define DEFINE_FETCH_deref(type) \ 116 - __kprobes void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs, \ 117 - void *data, void *dest) \ 115 + void FETCH_FUNC_NAME(deref, type)(struct pt_regs *regs, \ 116 + void *data, void *dest) \ 118 117 { \ 119 118 struct deref_fetch_param *dprm = data; \ 120 119 unsigned long addr; \ ··· 124 123 dprm->fetch(regs, (void *)addr, dest); \ 125 124 } else \ 126 125 *(type *)dest = 0; \ 127 - } 126 + } \ 127 + NOKPROBE_SYMBOL(FETCH_FUNC_NAME(deref, type)); 128 128 DEFINE_BASIC_FETCH_FUNCS(deref) 129 129 DEFINE_FETCH_deref(string) 130 130 131 - __kprobes void FETCH_FUNC_NAME(deref, string_size)(struct pt_regs *regs, 132 - void *data, void *dest) 131 + void FETCH_FUNC_NAME(deref, string_size)(struct pt_regs *regs, 132 + void *data, void *dest) 133 133 { 134 134 struct deref_fetch_param *dprm = data; 135 135 unsigned long addr; ··· 142 140 } else 143 141 *(string_size *)dest = 0; 144 142 } 143 + NOKPROBE_SYMBOL(FETCH_FUNC_NAME(deref, string_size)); 145 144 146 - static __kprobes void update_deref_fetch_param(struct deref_fetch_param *data) 145 + static void update_deref_fetch_param(struct deref_fetch_param *data) 147 146 { 148 147 if (CHECK_FETCH_FUNCS(deref, data->orig.fn)) 149 148 update_deref_fetch_param(data->orig.data); 150 149 else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn)) 151 150 update_symbol_cache(data->orig.data); 152 151 } 152 + NOKPROBE_SYMBOL(update_deref_fetch_param); 153 153 154 - static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data) 154 + static void free_deref_fetch_param(struct deref_fetch_param *data) 155 155 { 156 156 if (CHECK_FETCH_FUNCS(deref, data->orig.fn)) 157 157 free_deref_fetch_param(data->orig.data); ··· 161 157 free_symbol_cache(data->orig.data); 162 158 kfree(data); 163 159 } 160 + NOKPROBE_SYMBOL(free_deref_fetch_param); 164 161 165 162 /* Bitfield fetch function */ 166 163 struct bitfield_fetch_param { ··· 171 166 }; 172 167 173 168 #define DEFINE_FETCH_bitfield(type) \ 174 - __kprobes void FETCH_FUNC_NAME(bitfield, type)(struct pt_regs *regs, \ 175 - void *data, void *dest) \ 169 + void FETCH_FUNC_NAME(bitfield, type)(struct pt_regs *regs, \ 170 + void *data, void *dest) \ 176 171 { \ 177 172 struct bitfield_fetch_param *bprm = data; \ 178 173 type buf = 0; \ ··· 182 177 buf >>= bprm->low_shift; \ 183 178 } \ 184 179 *(type *)dest = buf; \ 185 - } 186 - 180 + } \ 181 + NOKPROBE_SYMBOL(FETCH_FUNC_NAME(bitfield, type)); 187 182 DEFINE_BASIC_FETCH_FUNCS(bitfield) 188 183 #define fetch_bitfield_string NULL 189 184 #define fetch_bitfield_string_size NULL 190 185 191 - static __kprobes void 186 + static void 192 187 update_bitfield_fetch_param(struct bitfield_fetch_param *data) 193 188 { 194 189 /* ··· 201 196 update_symbol_cache(data->orig.data); 202 197 } 203 198 204 - static __kprobes void 199 + static void 205 200 free_bitfield_fetch_param(struct bitfield_fetch_param *data) 206 201 { 207 202 /* ··· 260 255 } 261 256 262 257 /* Special function : only accept unsigned long */ 263 - static __kprobes void fetch_kernel_stack_address(struct pt_regs *regs, 264 - void *dummy, void *dest) 258 + static void fetch_kernel_stack_address(struct pt_regs *regs, void *dummy, void *dest) 265 259 { 266 260 *(unsigned long *)dest = kernel_stack_pointer(regs); 267 261 } 262 + NOKPROBE_SYMBOL(fetch_kernel_stack_address); 268 263 269 - static __kprobes void fetch_user_stack_address(struct pt_regs *regs, 270 - void *dummy, void *dest) 264 + static void fetch_user_stack_address(struct pt_regs *regs, void *dummy, void *dest) 271 265 { 272 266 *(unsigned long *)dest = user_stack_pointer(regs); 273 267 } 268 + NOKPROBE_SYMBOL(fetch_user_stack_address); 274 269 275 270 static fetch_func_t get_fetch_size_function(const struct fetch_type *type, 276 271 fetch_func_t orig_fn,
+7 -8
kernel/trace/trace_probe.h
··· 81 81 */ 82 82 #define convert_rloc_to_loc(dl, offs) ((u32)(dl) + (offs)) 83 83 84 - static inline void *get_rloc_data(u32 *dl) 84 + static nokprobe_inline void *get_rloc_data(u32 *dl) 85 85 { 86 86 return (u8 *)dl + get_rloc_offs(*dl); 87 87 } 88 88 89 89 /* For data_loc conversion */ 90 - static inline void *get_loc_data(u32 *dl, void *ent) 90 + static nokprobe_inline void *get_loc_data(u32 *dl, void *ent) 91 91 { 92 92 return (u8 *)ent + get_rloc_offs(*dl); 93 93 } ··· 136 136 137 137 /* Printing in basic type function template */ 138 138 #define DECLARE_BASIC_PRINT_TYPE_FUNC(type) \ 139 - __kprobes int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, \ 140 - const char *name, \ 141 - void *data, void *ent); \ 139 + int PRINT_TYPE_FUNC_NAME(type)(struct trace_seq *s, const char *name, \ 140 + void *data, void *ent); \ 142 141 extern const char PRINT_TYPE_FMT_NAME(type)[] 143 142 144 143 DECLARE_BASIC_PRINT_TYPE_FUNC(u8); ··· 302 303 return !!(tp->flags & TP_FLAG_REGISTERED); 303 304 } 304 305 305 - static inline __kprobes void call_fetch(struct fetch_param *fprm, 306 + static nokprobe_inline void call_fetch(struct fetch_param *fprm, 306 307 struct pt_regs *regs, void *dest) 307 308 { 308 309 return fprm->fn(regs, fprm->data, dest); ··· 350 351 extern int traceprobe_command(const char *buf, int (*createfn)(int, char**)); 351 352 352 353 /* Sum up total data length for dynamic arraies (strings) */ 353 - static inline __kprobes int 354 + static nokprobe_inline int 354 355 __get_data_size(struct trace_probe *tp, struct pt_regs *regs) 355 356 { 356 357 int i, ret = 0; ··· 366 367 } 367 368 368 369 /* Store the value of each argument */ 369 - static inline __kprobes void 370 + static nokprobe_inline void 370 371 store_trace_args(int ent_size, struct trace_probe *tp, struct pt_regs *regs, 371 372 u8 *data, int maxlen) 372 373 {
+39 -35
kernel/trace/trace_uprobe.c
··· 108 108 * Uprobes-specific fetch functions 109 109 */ 110 110 #define DEFINE_FETCH_stack(type) \ 111 - static __kprobes void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs,\ 112 - void *offset, void *dest) \ 111 + static void FETCH_FUNC_NAME(stack, type)(struct pt_regs *regs, \ 112 + void *offset, void *dest) \ 113 113 { \ 114 114 *(type *)dest = (type)get_user_stack_nth(regs, \ 115 115 ((unsigned long)offset)); \ ··· 120 120 #define fetch_stack_string_size NULL 121 121 122 122 #define DEFINE_FETCH_memory(type) \ 123 - static __kprobes void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs,\ 124 - void *addr, void *dest) \ 123 + static void FETCH_FUNC_NAME(memory, type)(struct pt_regs *regs, \ 124 + void *addr, void *dest) \ 125 125 { \ 126 126 type retval; \ 127 127 void __user *vaddr = (void __force __user *) addr; \ ··· 136 136 * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max 137 137 * length and relative data location. 138 138 */ 139 - static __kprobes void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs, 140 - void *addr, void *dest) 139 + static void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs, 140 + void *addr, void *dest) 141 141 { 142 142 long ret; 143 143 u32 rloc = *(u32 *)dest; ··· 158 158 } 159 159 } 160 160 161 - static __kprobes void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs, 162 - void *addr, void *dest) 161 + static void FETCH_FUNC_NAME(memory, string_size)(struct pt_regs *regs, 162 + void *addr, void *dest) 163 163 { 164 164 int len; 165 165 void __user *vaddr = (void __force __user *) addr; ··· 184 184 } 185 185 186 186 #define DEFINE_FETCH_file_offset(type) \ 187 - static __kprobes void FETCH_FUNC_NAME(file_offset, type)(struct pt_regs *regs,\ 188 - void *offset, void *dest) \ 187 + static void FETCH_FUNC_NAME(file_offset, type)(struct pt_regs *regs, \ 188 + void *offset, void *dest)\ 189 189 { \ 190 190 void *vaddr = (void *)translate_user_vaddr(offset); \ 191 191 \ ··· 1009 1009 return __uprobe_perf_filter(&tu->filter, event->hw.tp_target->mm); 1010 1010 } 1011 1011 1012 + static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event) 1013 + { 1014 + bool done; 1015 + 1016 + write_lock(&tu->filter.rwlock); 1017 + if (event->hw.tp_target) { 1018 + list_del(&event->hw.tp_list); 1019 + done = tu->filter.nr_systemwide || 1020 + (event->hw.tp_target->flags & PF_EXITING) || 1021 + uprobe_filter_event(tu, event); 1022 + } else { 1023 + tu->filter.nr_systemwide--; 1024 + done = tu->filter.nr_systemwide; 1025 + } 1026 + write_unlock(&tu->filter.rwlock); 1027 + 1028 + if (!done) 1029 + return uprobe_apply(tu->inode, tu->offset, &tu->consumer, false); 1030 + 1031 + return 0; 1032 + } 1033 + 1012 1034 static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event) 1013 1035 { 1014 1036 bool done; 1037 + int err; 1015 1038 1016 1039 write_lock(&tu->filter.rwlock); 1017 1040 if (event->hw.tp_target) { ··· 1056 1033 } 1057 1034 write_unlock(&tu->filter.rwlock); 1058 1035 1059 - if (!done) 1060 - uprobe_apply(tu->inode, tu->offset, &tu->consumer, true); 1061 - 1062 - return 0; 1063 - } 1064 - 1065 - static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event) 1066 - { 1067 - bool done; 1068 - 1069 - write_lock(&tu->filter.rwlock); 1070 - if (event->hw.tp_target) { 1071 - list_del(&event->hw.tp_list); 1072 - done = tu->filter.nr_systemwide || 1073 - (event->hw.tp_target->flags & PF_EXITING) || 1074 - uprobe_filter_event(tu, event); 1075 - } else { 1076 - tu->filter.nr_systemwide--; 1077 - done = tu->filter.nr_systemwide; 1036 + err = 0; 1037 + if (!done) { 1038 + err = uprobe_apply(tu->inode, tu->offset, &tu->consumer, true); 1039 + if (err) 1040 + uprobe_perf_close(tu, event); 1078 1041 } 1079 - write_unlock(&tu->filter.rwlock); 1080 - 1081 - if (!done) 1082 - uprobe_apply(tu->inode, tu->offset, &tu->consumer, false); 1083 - 1084 - return 0; 1042 + return err; 1085 1043 } 1086 1044 1087 1045 static bool uprobe_perf_filter(struct uprobe_consumer *uc,
+42 -1
tools/lib/api/fs/fs.c
··· 1 1 /* TODO merge/factor in debugfs.c here */ 2 2 3 + #include <ctype.h> 3 4 #include <errno.h> 4 5 #include <stdbool.h> 5 6 #include <stdio.h> 7 + #include <stdlib.h> 6 8 #include <string.h> 7 9 #include <sys/vfs.h> 8 10 ··· 98 96 return false; 99 97 } 100 98 99 + static void mem_toupper(char *f, size_t len) 100 + { 101 + while (len) { 102 + *f = toupper(*f); 103 + f++; 104 + len--; 105 + } 106 + } 107 + 108 + /* 109 + * Check for "NAME_PATH" environment variable to override fs location (for 110 + * testing). This matches the recommendation in Documentation/sysfs-rules.txt 111 + * for SYSFS_PATH. 112 + */ 113 + static bool fs__env_override(struct fs *fs) 114 + { 115 + char *override_path; 116 + size_t name_len = strlen(fs->name); 117 + /* name + "_PATH" + '\0' */ 118 + char upper_name[name_len + 5 + 1]; 119 + memcpy(upper_name, fs->name, name_len); 120 + mem_toupper(upper_name, name_len); 121 + strcpy(&upper_name[name_len], "_PATH"); 122 + 123 + override_path = getenv(upper_name); 124 + if (!override_path) 125 + return false; 126 + 127 + fs->found = true; 128 + strncpy(fs->path, override_path, sizeof(fs->path)); 129 + return true; 130 + } 131 + 101 132 static const char *fs__get_mountpoint(struct fs *fs) 102 133 { 134 + if (fs__env_override(fs)) 135 + return fs->path; 136 + 103 137 if (fs__check_mounts(fs)) 104 138 return fs->path; 105 139 106 - return fs__read_mounts(fs) ? fs->path : NULL; 140 + if (fs__read_mounts(fs)) 141 + return fs->path; 142 + 143 + return NULL; 107 144 } 108 145 109 146 static const char *fs__mountpoint(int idx)
+2 -1
tools/perf/Documentation/perf-record.txt
··· 184 184 - in_tx: only when the target is in a hardware transaction 185 185 - no_tx: only when the target is not in a hardware transaction 186 186 - abort_tx: only when the target is a hardware transaction abort 187 + - cond: conditional branches 187 188 188 189 + 189 - The option requires at least one branch type among any, any_call, any_ret, ind_call. 190 + The option requires at least one branch type among any, any_call, any_ret, ind_call, cond. 190 191 The privilege levels may be omitted, in which case, the privilege levels of the associated 191 192 event are applied to the branch filter. Both kernel (k) and hypervisor (hv) privilege 192 193 levels are subject to permissions. When sampling on multiple events, branch stack sampling
+6 -1
tools/perf/Documentation/perf-report.txt
··· 111 111 --fields=:: 112 112 Specify output field - multiple keys can be specified in CSV format. 113 113 Following fields are available: 114 - overhead, overhead_sys, overhead_us, sample and period. 114 + overhead, overhead_sys, overhead_us, overhead_children, sample and period. 115 115 Also it can contain any sort key(s). 116 116 117 117 By default, every sort keys not specified in -F will be appended ··· 162 162 - address: compare on individual code addresses 163 163 164 164 Default: fractal,0.5,callee,function. 165 + 166 + --children:: 167 + Accumulate callchain of children to parent entry so that then can 168 + show up in the output. The output will have a new "Children" column 169 + and will be sorted on the data. It requires callchains are recorded. 165 170 166 171 --max-stack:: 167 172 Set the stack depth limit when parsing the callchain, anything
+7 -1
tools/perf/Documentation/perf-top.txt
··· 119 119 --fields=:: 120 120 Specify output field - multiple keys can be specified in CSV format. 121 121 Following fields are available: 122 - overhead, overhead_sys, overhead_us, sample and period. 122 + overhead, overhead_sys, overhead_us, overhead_children, sample and period. 123 123 Also it can contain any sort key(s). 124 124 125 125 By default, every sort keys not specified in --field will be appended ··· 160 160 --call-graph:: 161 161 Setup and enable call-graph (stack chain/backtrace) recording, 162 162 implies -g. 163 + 164 + --children:: 165 + Accumulate callchain of children to parent entry so that then can 166 + show up in the output. The output will have a new "Children" column 167 + and will be sorted on the data. It requires -g/--call-graph option 168 + enabled. 163 169 164 170 --max-stack:: 165 171 Set the stack depth limit when parsing the callchain, anything
+9 -5
tools/perf/Makefile.perf
··· 400 400 LIB_OBJS += $(OUTPUT)tests/hists_link.o 401 401 LIB_OBJS += $(OUTPUT)tests/hists_filter.o 402 402 LIB_OBJS += $(OUTPUT)tests/hists_output.o 403 + LIB_OBJS += $(OUTPUT)tests/hists_cumulate.o 403 404 LIB_OBJS += $(OUTPUT)tests/python-use.o 404 405 LIB_OBJS += $(OUTPUT)tests/bp_signal.o 405 406 LIB_OBJS += $(OUTPUT)tests/bp_signal_overflow.o ··· 789 788 @echo '' 790 789 @echo 'Perf install targets:' 791 790 @echo ' NOTE: documentation build requires asciidoc, xmlto packages to be installed' 792 - @echo ' HINT: use "make prefix=<path> <install target>" to install to a particular' 793 - @echo ' path like make prefix=/usr/local install install-doc' 791 + @echo ' HINT: use "prefix" or "DESTDIR" to install to a particular' 792 + @echo ' path like "make prefix=/usr/local install install-doc"' 794 793 @echo ' install - install compiled binaries' 795 794 @echo ' install-doc - install *all* documentation' 796 795 @echo ' install-man - install manpage documentation' ··· 815 814 $(DOC_TARGETS): 816 815 $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) $(@:doc=all) 817 816 817 + TAG_FOLDERS= . ../lib/traceevent ../lib/api ../lib/symbol 818 + TAG_FILES= ../../include/uapi/linux/perf_event.h 819 + 818 820 TAGS: 819 821 $(RM) TAGS 820 - $(FIND) . -name '*.[hcS]' -print | xargs etags -a 822 + $(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print | xargs etags -a $(TAG_FILES) 821 823 822 824 tags: 823 825 $(RM) tags 824 - $(FIND) . -name '*.[hcS]' -print | xargs ctags -a 826 + $(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print | xargs ctags -a $(TAG_FILES) 825 827 826 828 cscope: 827 829 $(RM) cscope* 828 - $(FIND) . -name '*.[hcS]' -print | xargs cscope -b 830 + $(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print | xargs cscope -b $(TAG_FILES) 829 831 830 832 ### Detect prefix changes 831 833 TRACK_CFLAGS = $(subst ','\'',$(CFLAGS)):\
+3 -2
tools/perf/builtin-annotate.c
··· 65 65 return 0; 66 66 } 67 67 68 - he = __hists__add_entry(&evsel->hists, al, NULL, NULL, NULL, 1, 1, 0); 68 + he = __hists__add_entry(&evsel->hists, al, NULL, NULL, NULL, 1, 1, 0, 69 + true); 69 70 if (he == NULL) 70 71 return -ENOMEM; 71 72 72 73 ret = hist_entry__inc_addr_samples(he, evsel->idx, al->addr); 73 - hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE); 74 + hists__inc_nr_samples(&evsel->hists, true); 74 75 return ret; 75 76 } 76 77
+1 -1
tools/perf/builtin-diff.c
··· 315 315 u64 weight, u64 transaction) 316 316 { 317 317 if (__hists__add_entry(hists, al, NULL, NULL, NULL, period, weight, 318 - transaction) != NULL) 318 + transaction, true) != NULL) 319 319 return 0; 320 320 return -ENOMEM; 321 321 }
+6 -1
tools/perf/builtin-record.c
··· 454 454 if (done) 455 455 break; 456 456 err = poll(rec->evlist->pollfd, rec->evlist->nr_fds, -1); 457 - if (err < 0 && errno == EINTR) 457 + /* 458 + * Propagate error, only if there's any. Ignore positive 459 + * number of returned events and interrupt error. 460 + */ 461 + if (err > 0 || (err < 0 && errno == EINTR)) 458 462 err = 0; 459 463 waking++; 460 464 } ··· 548 544 BRANCH_OPT("abort_tx", PERF_SAMPLE_BRANCH_ABORT_TX), 549 545 BRANCH_OPT("in_tx", PERF_SAMPLE_BRANCH_IN_TX), 550 546 BRANCH_OPT("no_tx", PERF_SAMPLE_BRANCH_NO_TX), 547 + BRANCH_OPT("cond", PERF_SAMPLE_BRANCH_COND), 551 548 BRANCH_END 552 549 }; 553 550
+64 -146
tools/perf/builtin-report.c
··· 72 72 rep->min_percent = strtof(value, NULL); 73 73 return 0; 74 74 } 75 + if (!strcmp(var, "report.children")) { 76 + symbol_conf.cumulate_callchain = perf_config_bool(var, value); 77 + return 0; 78 + } 75 79 76 80 return perf_default_config(var, value, cb); 77 81 } ··· 89 85 */ 90 86 if (he->stat.nr_events == 1) 91 87 rep->nr_entries++; 92 - 93 - /* 94 - * Only counts number of samples at this stage as it's more 95 - * natural to do it here and non-sample events are also 96 - * counted in perf_session_deliver_event(). The dump_trace 97 - * requires this info is ready before going to the output tree. 98 - */ 99 - hists__inc_nr_events(he->hists, PERF_RECORD_SAMPLE); 100 - if (!he->filtered) 101 - he->hists->stats.nr_non_filtered_samples++; 102 88 } 103 89 104 - static int report__add_mem_hist_entry(struct report *rep, struct addr_location *al, 105 - struct perf_sample *sample, struct perf_evsel *evsel) 90 + static int hist_iter__report_callback(struct hist_entry_iter *iter, 91 + struct addr_location *al, bool single, 92 + void *arg) 106 93 { 107 - struct symbol *parent = NULL; 108 - struct hist_entry *he; 109 - struct mem_info *mi, *mx; 110 - uint64_t cost; 111 - int err = sample__resolve_callchain(sample, &parent, evsel, al, rep->max_stack); 94 + int err = 0; 95 + struct report *rep = arg; 96 + struct hist_entry *he = iter->he; 97 + struct perf_evsel *evsel = iter->evsel; 98 + struct mem_info *mi; 99 + struct branch_info *bi; 112 100 113 - if (err) 114 - return err; 101 + report__inc_stats(rep, he); 115 102 116 - mi = sample__resolve_mem(sample, al); 117 - if (!mi) 118 - return -ENOMEM; 119 - 120 - if (rep->hide_unresolved && !al->sym) 103 + if (!ui__has_annotation()) 121 104 return 0; 122 105 123 - cost = sample->weight; 124 - if (!cost) 125 - cost = 1; 126 - 127 - /* 128 - * must pass period=weight in order to get the correct 129 - * sorting from hists__collapse_resort() which is solely 130 - * based on periods. We want sorting be done on nr_events * weight 131 - * and this is indirectly achieved by passing period=weight here 132 - * and the he_stat__add_period() function. 133 - */ 134 - he = __hists__add_entry(&evsel->hists, al, parent, NULL, mi, 135 - cost, cost, 0); 136 - if (!he) 137 - return -ENOMEM; 138 - 139 - if (ui__has_annotation()) { 140 - err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr); 106 + if (sort__mode == SORT_MODE__BRANCH) { 107 + bi = he->branch_info; 108 + err = addr_map_symbol__inc_samples(&bi->from, evsel->idx); 141 109 if (err) 142 110 goto out; 143 111 144 - mx = he->mem_info; 145 - err = addr_map_symbol__inc_samples(&mx->daddr, evsel->idx); 112 + err = addr_map_symbol__inc_samples(&bi->to, evsel->idx); 113 + 114 + } else if (rep->mem_mode) { 115 + mi = he->mem_info; 116 + err = addr_map_symbol__inc_samples(&mi->daddr, evsel->idx); 146 117 if (err) 147 118 goto out; 148 - } 149 119 150 - report__inc_stats(rep, he); 151 - 152 - err = hist_entry__append_callchain(he, sample); 153 - out: 154 - return err; 155 - } 156 - 157 - static int report__add_branch_hist_entry(struct report *rep, struct addr_location *al, 158 - struct perf_sample *sample, struct perf_evsel *evsel) 159 - { 160 - struct symbol *parent = NULL; 161 - unsigned i; 162 - struct hist_entry *he; 163 - struct branch_info *bi, *bx; 164 - int err = sample__resolve_callchain(sample, &parent, evsel, al, rep->max_stack); 165 - 166 - if (err) 167 - return err; 168 - 169 - bi = sample__resolve_bstack(sample, al); 170 - if (!bi) 171 - return -ENOMEM; 172 - 173 - for (i = 0; i < sample->branch_stack->nr; i++) { 174 - if (rep->hide_unresolved && !(bi[i].from.sym && bi[i].to.sym)) 175 - continue; 176 - 177 - err = -ENOMEM; 178 - 179 - /* overwrite the 'al' to branch-to info */ 180 - al->map = bi[i].to.map; 181 - al->sym = bi[i].to.sym; 182 - al->addr = bi[i].to.addr; 183 - /* 184 - * The report shows the percentage of total branches captured 185 - * and not events sampled. Thus we use a pseudo period of 1. 186 - */ 187 - he = __hists__add_entry(&evsel->hists, al, parent, &bi[i], NULL, 188 - 1, 1, 0); 189 - if (he) { 190 - if (ui__has_annotation()) { 191 - bx = he->branch_info; 192 - err = addr_map_symbol__inc_samples(&bx->from, 193 - evsel->idx); 194 - if (err) 195 - goto out; 196 - 197 - err = addr_map_symbol__inc_samples(&bx->to, 198 - evsel->idx); 199 - if (err) 200 - goto out; 201 - } 202 - report__inc_stats(rep, he); 203 - } else 204 - goto out; 205 - } 206 - err = 0; 207 - out: 208 - free(bi); 209 - return err; 210 - } 211 - 212 - static int report__add_hist_entry(struct report *rep, struct perf_evsel *evsel, 213 - struct addr_location *al, struct perf_sample *sample) 214 - { 215 - struct symbol *parent = NULL; 216 - struct hist_entry *he; 217 - int err = sample__resolve_callchain(sample, &parent, evsel, al, rep->max_stack); 218 - 219 - if (err) 220 - return err; 221 - 222 - he = __hists__add_entry(&evsel->hists, al, parent, NULL, NULL, 223 - sample->period, sample->weight, 224 - sample->transaction); 225 - if (he == NULL) 226 - return -ENOMEM; 227 - 228 - err = hist_entry__append_callchain(he, sample); 229 - if (err) 230 - goto out; 231 - 232 - if (ui__has_annotation()) 233 120 err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr); 234 121 235 - report__inc_stats(rep, he); 122 + } else if (symbol_conf.cumulate_callchain) { 123 + if (single) 124 + err = hist_entry__inc_addr_samples(he, evsel->idx, 125 + al->addr); 126 + } else { 127 + err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr); 128 + } 236 129 237 130 out: 238 131 return err; 239 132 } 240 - 241 133 242 134 static int process_sample_event(struct perf_tool *tool, 243 135 union perf_event *event, ··· 143 243 { 144 244 struct report *rep = container_of(tool, struct report, tool); 145 245 struct addr_location al; 246 + struct hist_entry_iter iter = { 247 + .hide_unresolved = rep->hide_unresolved, 248 + .add_entry_cb = hist_iter__report_callback, 249 + }; 146 250 int ret; 147 251 148 252 if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) { ··· 161 257 if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap)) 162 258 return 0; 163 259 164 - if (sort__mode == SORT_MODE__BRANCH) { 165 - ret = report__add_branch_hist_entry(rep, &al, sample, evsel); 166 - if (ret < 0) 167 - pr_debug("problem adding lbr entry, skipping event\n"); 168 - } else if (rep->mem_mode == 1) { 169 - ret = report__add_mem_hist_entry(rep, &al, sample, evsel); 170 - if (ret < 0) 171 - pr_debug("problem adding mem entry, skipping event\n"); 172 - } else { 173 - if (al.map != NULL) 174 - al.map->dso->hit = 1; 260 + if (sort__mode == SORT_MODE__BRANCH) 261 + iter.ops = &hist_iter_branch; 262 + else if (rep->mem_mode) 263 + iter.ops = &hist_iter_mem; 264 + else if (symbol_conf.cumulate_callchain) 265 + iter.ops = &hist_iter_cumulative; 266 + else 267 + iter.ops = &hist_iter_normal; 175 268 176 - ret = report__add_hist_entry(rep, evsel, &al, sample); 177 - if (ret < 0) 178 - pr_debug("problem incrementing symbol period, skipping event\n"); 179 - } 269 + if (al.map != NULL) 270 + al.map->dso->hit = 1; 271 + 272 + ret = hist_entry_iter__add(&iter, &al, evsel, sample, rep->max_stack, 273 + rep); 274 + if (ret < 0) 275 + pr_debug("problem adding hist entry, skipping event\n"); 276 + 180 277 return ret; 181 278 } 182 279 ··· 232 327 ui__error("Can't register callchain params.\n"); 233 328 return -EINVAL; 234 329 } 330 + } 331 + 332 + if (symbol_conf.cumulate_callchain) { 333 + /* Silently ignore if callchain is missing */ 334 + if (!(sample_type & PERF_SAMPLE_CALLCHAIN)) { 335 + symbol_conf.cumulate_callchain = false; 336 + perf_hpp__cancel_cumulate(); 337 + } 235 338 } 236 339 237 340 if (sort__mode == SORT_MODE__BRANCH) { ··· 625 712 OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order", 626 713 "Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address). " 627 714 "Default: fractal,0.5,callee,function", &report_parse_callchain_opt, callchain_default_opt), 715 + OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain, 716 + "Accumulate callchains of children and show total overhead as well"), 628 717 OPT_INTEGER(0, "max-stack", &report.max_stack, 629 718 "Set the maximum stack depth when parsing the callchain, " 630 719 "anything beyond the specified depth will be ignored. " ··· 719 804 has_br_stack = perf_header__has_feat(&session->header, 720 805 HEADER_BRANCH_STACK); 721 806 722 - if (branch_mode == -1 && has_br_stack) 807 + if (branch_mode == -1 && has_br_stack) { 723 808 sort__mode = SORT_MODE__BRANCH; 809 + symbol_conf.cumulate_callchain = false; 810 + } 724 811 725 812 if (report.mem_mode) { 726 813 if (sort__mode == SORT_MODE__BRANCH) { ··· 730 813 goto error; 731 814 } 732 815 sort__mode = SORT_MODE__MEMORY; 816 + symbol_conf.cumulate_callchain = false; 733 817 } 734 818 735 819 if (setup_sorting() < 0) {
+1 -1
tools/perf/builtin-sched.c
··· 1428 1428 int err = 0; 1429 1429 1430 1430 evsel->hists.stats.total_period += sample->period; 1431 - hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE); 1431 + hists__inc_nr_samples(&evsel->hists, true); 1432 1432 1433 1433 if (evsel->handler != NULL) { 1434 1434 tracepoint_handler f = evsel->handler;
+52 -38
tools/perf/builtin-top.c
··· 196 196 197 197 pthread_mutex_unlock(&notes->lock); 198 198 199 + /* 200 + * This function is now called with he->hists->lock held. 201 + * Release it before going to sleep. 202 + */ 203 + pthread_mutex_unlock(&he->hists->lock); 204 + 199 205 if (err == -ERANGE && !he->ms.map->erange_warned) 200 206 ui__warn_map_erange(he->ms.map, sym, ip); 201 207 else if (err == -ENOMEM) { ··· 209 203 sym->name); 210 204 sleep(1); 211 205 } 206 + 207 + pthread_mutex_lock(&he->hists->lock); 212 208 } 213 209 214 210 static void perf_top__show_details(struct perf_top *top) ··· 244 236 printf("%d lines not displayed, maybe increase display entries [e]\n", more); 245 237 out_unlock: 246 238 pthread_mutex_unlock(&notes->lock); 247 - } 248 - 249 - static struct hist_entry *perf_evsel__add_hist_entry(struct perf_evsel *evsel, 250 - struct addr_location *al, 251 - struct perf_sample *sample) 252 - { 253 - struct hist_entry *he; 254 - 255 - pthread_mutex_lock(&evsel->hists.lock); 256 - he = __hists__add_entry(&evsel->hists, al, NULL, NULL, NULL, 257 - sample->period, sample->weight, 258 - sample->transaction); 259 - pthread_mutex_unlock(&evsel->hists.lock); 260 - if (he == NULL) 261 - return NULL; 262 - 263 - hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE); 264 - if (!he->filtered) 265 - evsel->hists.stats.nr_non_filtered_samples++; 266 - 267 - return he; 268 239 } 269 240 270 241 static void perf_top__print_sym_table(struct perf_top *top) ··· 649 662 return 0; 650 663 } 651 664 665 + static int hist_iter__top_callback(struct hist_entry_iter *iter, 666 + struct addr_location *al, bool single, 667 + void *arg) 668 + { 669 + struct perf_top *top = arg; 670 + struct hist_entry *he = iter->he; 671 + struct perf_evsel *evsel = iter->evsel; 672 + 673 + if (sort__has_sym && single) { 674 + u64 ip = al->addr; 675 + 676 + if (al->map) 677 + ip = al->map->unmap_ip(al->map, ip); 678 + 679 + perf_top__record_precise_ip(top, he, evsel->idx, ip); 680 + } 681 + 682 + return 0; 683 + } 684 + 652 685 static void perf_event__process_sample(struct perf_tool *tool, 653 686 const union perf_event *event, 654 687 struct perf_evsel *evsel, ··· 676 669 struct machine *machine) 677 670 { 678 671 struct perf_top *top = container_of(tool, struct perf_top, tool); 679 - struct symbol *parent = NULL; 680 - u64 ip = sample->ip; 681 672 struct addr_location al; 682 673 int err; 683 674 ··· 750 745 } 751 746 752 747 if (al.sym == NULL || !al.sym->ignore) { 753 - struct hist_entry *he; 748 + struct hist_entry_iter iter = { 749 + .add_entry_cb = hist_iter__top_callback, 750 + }; 754 751 755 - err = sample__resolve_callchain(sample, &parent, evsel, &al, 756 - top->max_stack); 757 - if (err) 758 - return; 752 + if (symbol_conf.cumulate_callchain) 753 + iter.ops = &hist_iter_cumulative; 754 + else 755 + iter.ops = &hist_iter_normal; 759 756 760 - he = perf_evsel__add_hist_entry(evsel, &al, sample); 761 - if (he == NULL) { 757 + pthread_mutex_lock(&evsel->hists.lock); 758 + 759 + err = hist_entry_iter__add(&iter, &al, evsel, sample, 760 + top->max_stack, top); 761 + if (err < 0) 762 762 pr_err("Problem incrementing symbol period, skipping event\n"); 763 - return; 764 - } 765 763 766 - err = hist_entry__append_callchain(he, sample); 767 - if (err) 768 - return; 769 - 770 - if (sort__has_sym) 771 - perf_top__record_precise_ip(top, he, evsel->idx, ip); 764 + pthread_mutex_unlock(&evsel->hists.lock); 772 765 } 773 766 774 767 return; ··· 1004 1001 1005 1002 if (!strcmp(var, "top.call-graph")) 1006 1003 return record_parse_callchain(value, &top->record_opts); 1004 + if (!strcmp(var, "top.children")) { 1005 + symbol_conf.cumulate_callchain = perf_config_bool(var, value); 1006 + return 0; 1007 + } 1007 1008 1008 1009 return perf_default_config(var, value, cb); 1009 1010 } ··· 1102 1095 OPT_CALLBACK(0, "call-graph", &top.record_opts, 1103 1096 "mode[,dump_size]", record_callchain_help, 1104 1097 &parse_callchain_opt), 1098 + OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain, 1099 + "Accumulate callchains of children and show total overhead as well"), 1105 1100 OPT_INTEGER(0, "max-stack", &top.max_stack, 1106 1101 "Set the maximum stack depth when parsing the callchain. " 1107 1102 "Default: " __stringify(PERF_MAX_STACK_DEPTH)), ··· 1208 1199 } 1209 1200 1210 1201 top.sym_evsel = perf_evlist__first(top.evlist); 1202 + 1203 + if (!symbol_conf.use_callchain) { 1204 + symbol_conf.cumulate_callchain = false; 1205 + perf_hpp__cancel_cumulate(); 1206 + } 1211 1207 1212 1208 symbol_conf.priv_size = sizeof(struct annotation); 1213 1209
+2 -1
tools/perf/config/Makefile
··· 447 447 ifneq ($(feature-libperl), 1) 448 448 CFLAGS += -DNO_LIBPERL 449 449 NO_LIBPERL := 1 450 + msg := $(warning Missing perl devel files. Disabling perl scripting support, consider installing perl-ExtUtils-Embed); 450 451 else 451 452 LDFLAGS += $(PERL_EMBED_LDFLAGS) 452 453 EXTLIBS += $(PERL_EMBED_LIBADD) ··· 600 599 601 600 # Make the path relative to DESTDIR, not to prefix 602 601 ifndef DESTDIR 603 - prefix = $(HOME) 602 + prefix ?= $(HOME) 604 603 endif 605 604 bindir_relative = bin 606 605 bindir = $(prefix)/$(bindir_relative)
+6 -2
tools/perf/perf.c
··· 481 481 fprintf(stderr, "cannot handle %s internally", cmd); 482 482 goto out; 483 483 } 484 - #ifdef HAVE_LIBAUDIT_SUPPORT 485 484 if (!prefixcmp(cmd, "trace")) { 485 + #ifdef HAVE_LIBAUDIT_SUPPORT 486 486 set_buildid_dir(); 487 487 setup_path(); 488 488 argv[0] = "trace"; 489 489 return cmd_trace(argc, argv, NULL); 490 - } 490 + #else 491 + fprintf(stderr, 492 + "trace command not available: missing audit-libs devel package at build time.\n"); 493 + goto out; 491 494 #endif 495 + } 492 496 /* Look for flags.. */ 493 497 argv++; 494 498 argc--;
+4
tools/perf/tests/builtin-test.c
··· 140 140 .func = test__hists_output, 141 141 }, 142 142 { 143 + .desc = "Test cumulation of child hist entries", 144 + .func = test__hists_cumulate, 145 + }, 146 + { 143 147 .func = NULL, 144 148 }, 145 149 };
+28 -24
tools/perf/tests/hists_common.c
··· 12 12 u32 pid; 13 13 const char *comm; 14 14 } fake_threads[] = { 15 - { 100, "perf" }, 16 - { 200, "perf" }, 17 - { 300, "bash" }, 15 + { FAKE_PID_PERF1, "perf" }, 16 + { FAKE_PID_PERF2, "perf" }, 17 + { FAKE_PID_BASH, "bash" }, 18 18 }; 19 19 20 20 static struct { ··· 22 22 u64 start; 23 23 const char *filename; 24 24 } fake_mmap_info[] = { 25 - { 100, 0x40000, "perf" }, 26 - { 100, 0x50000, "libc" }, 27 - { 100, 0xf0000, "[kernel]" }, 28 - { 200, 0x40000, "perf" }, 29 - { 200, 0x50000, "libc" }, 30 - { 200, 0xf0000, "[kernel]" }, 31 - { 300, 0x40000, "bash" }, 32 - { 300, 0x50000, "libc" }, 33 - { 300, 0xf0000, "[kernel]" }, 25 + { FAKE_PID_PERF1, FAKE_MAP_PERF, "perf" }, 26 + { FAKE_PID_PERF1, FAKE_MAP_LIBC, "libc" }, 27 + { FAKE_PID_PERF1, FAKE_MAP_KERNEL, "[kernel]" }, 28 + { FAKE_PID_PERF2, FAKE_MAP_PERF, "perf" }, 29 + { FAKE_PID_PERF2, FAKE_MAP_LIBC, "libc" }, 30 + { FAKE_PID_PERF2, FAKE_MAP_KERNEL, "[kernel]" }, 31 + { FAKE_PID_BASH, FAKE_MAP_BASH, "bash" }, 32 + { FAKE_PID_BASH, FAKE_MAP_LIBC, "libc" }, 33 + { FAKE_PID_BASH, FAKE_MAP_KERNEL, "[kernel]" }, 34 34 }; 35 35 36 36 struct fake_sym { ··· 40 40 }; 41 41 42 42 static struct fake_sym perf_syms[] = { 43 - { 700, 100, "main" }, 44 - { 800, 100, "run_command" }, 45 - { 900, 100, "cmd_record" }, 43 + { FAKE_SYM_OFFSET1, FAKE_SYM_LENGTH, "main" }, 44 + { FAKE_SYM_OFFSET2, FAKE_SYM_LENGTH, "run_command" }, 45 + { FAKE_SYM_OFFSET3, FAKE_SYM_LENGTH, "cmd_record" }, 46 46 }; 47 47 48 48 static struct fake_sym bash_syms[] = { 49 - { 700, 100, "main" }, 50 - { 800, 100, "xmalloc" }, 51 - { 900, 100, "xfree" }, 49 + { FAKE_SYM_OFFSET1, FAKE_SYM_LENGTH, "main" }, 50 + { FAKE_SYM_OFFSET2, FAKE_SYM_LENGTH, "xmalloc" }, 51 + { FAKE_SYM_OFFSET3, FAKE_SYM_LENGTH, "xfree" }, 52 52 }; 53 53 54 54 static struct fake_sym libc_syms[] = { 55 55 { 700, 100, "malloc" }, 56 56 { 800, 100, "free" }, 57 57 { 900, 100, "realloc" }, 58 + { FAKE_SYM_OFFSET1, FAKE_SYM_LENGTH, "malloc" }, 59 + { FAKE_SYM_OFFSET2, FAKE_SYM_LENGTH, "free" }, 60 + { FAKE_SYM_OFFSET3, FAKE_SYM_LENGTH, "realloc" }, 58 61 }; 59 62 60 63 static struct fake_sym kernel_syms[] = { 61 - { 700, 100, "schedule" }, 62 - { 800, 100, "page_fault" }, 63 - { 900, 100, "sys_perf_event_open" }, 64 + { FAKE_SYM_OFFSET1, FAKE_SYM_LENGTH, "schedule" }, 65 + { FAKE_SYM_OFFSET2, FAKE_SYM_LENGTH, "page_fault" }, 66 + { FAKE_SYM_OFFSET3, FAKE_SYM_LENGTH, "sys_perf_event_open" }, 64 67 }; 65 68 66 69 static struct { ··· 105 102 .pid = fake_mmap_info[i].pid, 106 103 .tid = fake_mmap_info[i].pid, 107 104 .start = fake_mmap_info[i].start, 108 - .len = 0x1000ULL, 105 + .len = FAKE_MAP_LENGTH, 109 106 .pgoff = 0ULL, 110 107 }, 111 108 }; ··· 196 193 he = rb_entry(node, struct hist_entry, rb_node); 197 194 198 195 if (!he->filtered) { 199 - pr_info("%2d: entry: %8s:%5d [%-8s] %20s: period = %"PRIu64"\n", 196 + pr_info("%2d: entry: %8s:%5d [%-8s] %20s: period = %"PRIu64"/%"PRIu64"\n", 200 197 i, thread__comm_str(he->thread), he->thread->tid, 201 198 he->ms.map->dso->short_name, 202 - he->ms.sym->name, he->stat.period); 199 + he->ms.sym->name, he->stat.period, 200 + he->stat_acc ? he->stat_acc->period : 0); 203 201 } 204 202 205 203 i++;
+30 -2
tools/perf/tests/hists_common.h
··· 4 4 struct machine; 5 5 struct machines; 6 6 7 + #define FAKE_PID_PERF1 100 8 + #define FAKE_PID_PERF2 200 9 + #define FAKE_PID_BASH 300 10 + 11 + #define FAKE_MAP_PERF 0x400000 12 + #define FAKE_MAP_BASH 0x400000 13 + #define FAKE_MAP_LIBC 0x500000 14 + #define FAKE_MAP_KERNEL 0xf00000 15 + #define FAKE_MAP_LENGTH 0x100000 16 + 17 + #define FAKE_SYM_OFFSET1 700 18 + #define FAKE_SYM_OFFSET2 800 19 + #define FAKE_SYM_OFFSET3 900 20 + #define FAKE_SYM_LENGTH 100 21 + 22 + #define FAKE_IP_PERF_MAIN FAKE_MAP_PERF + FAKE_SYM_OFFSET1 23 + #define FAKE_IP_PERF_RUN_COMMAND FAKE_MAP_PERF + FAKE_SYM_OFFSET2 24 + #define FAKE_IP_PERF_CMD_RECORD FAKE_MAP_PERF + FAKE_SYM_OFFSET3 25 + #define FAKE_IP_BASH_MAIN FAKE_MAP_BASH + FAKE_SYM_OFFSET1 26 + #define FAKE_IP_BASH_XMALLOC FAKE_MAP_BASH + FAKE_SYM_OFFSET2 27 + #define FAKE_IP_BASH_XFREE FAKE_MAP_BASH + FAKE_SYM_OFFSET3 28 + #define FAKE_IP_LIBC_MALLOC FAKE_MAP_LIBC + FAKE_SYM_OFFSET1 29 + #define FAKE_IP_LIBC_FREE FAKE_MAP_LIBC + FAKE_SYM_OFFSET2 30 + #define FAKE_IP_LIBC_REALLOC FAKE_MAP_LIBC + FAKE_SYM_OFFSET3 31 + #define FAKE_IP_KERNEL_SCHEDULE FAKE_MAP_KERNEL + FAKE_SYM_OFFSET1 32 + #define FAKE_IP_KERNEL_PAGE_FAULT FAKE_MAP_KERNEL + FAKE_SYM_OFFSET2 33 + #define FAKE_IP_KERNEL_SYS_PERF_EVENT_OPEN FAKE_MAP_KERNEL + FAKE_SYM_OFFSET3 34 + 7 35 /* 8 36 * The setup_fake_machine() provides a test environment which consists 9 37 * of 3 processes that have 3 mappings and in turn, have 3 symbols ··· 41 13 * ............. ............. ................... 42 14 * perf: 100 perf main 43 15 * perf: 100 perf run_command 44 - * perf: 100 perf comd_record 16 + * perf: 100 perf cmd_record 45 17 * perf: 100 libc malloc 46 18 * perf: 100 libc free 47 19 * perf: 100 libc realloc ··· 50 22 * perf: 100 [kernel] sys_perf_event_open 51 23 * perf: 200 perf main 52 24 * perf: 200 perf run_command 53 - * perf: 200 perf comd_record 25 + * perf: 200 perf cmd_record 54 26 * perf: 200 libc malloc 55 27 * perf: 200 libc free 56 28 * perf: 200 libc realloc
+726
tools/perf/tests/hists_cumulate.c
··· 1 + #include "perf.h" 2 + #include "util/debug.h" 3 + #include "util/symbol.h" 4 + #include "util/sort.h" 5 + #include "util/evsel.h" 6 + #include "util/evlist.h" 7 + #include "util/machine.h" 8 + #include "util/thread.h" 9 + #include "util/parse-events.h" 10 + #include "tests/tests.h" 11 + #include "tests/hists_common.h" 12 + 13 + struct sample { 14 + u32 pid; 15 + u64 ip; 16 + struct thread *thread; 17 + struct map *map; 18 + struct symbol *sym; 19 + }; 20 + 21 + /* For the numbers, see hists_common.c */ 22 + static struct sample fake_samples[] = { 23 + /* perf [kernel] schedule() */ 24 + { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, }, 25 + /* perf [perf] main() */ 26 + { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_MAIN, }, 27 + /* perf [perf] cmd_record() */ 28 + { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_CMD_RECORD, }, 29 + /* perf [libc] malloc() */ 30 + { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, }, 31 + /* perf [libc] free() */ 32 + { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_FREE, }, 33 + /* perf [perf] main() */ 34 + { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, }, 35 + /* perf [kernel] page_fault() */ 36 + { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_PAGE_FAULT, }, 37 + /* bash [bash] main() */ 38 + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_MAIN, }, 39 + /* bash [bash] xmalloc() */ 40 + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_XMALLOC, }, 41 + /* bash [kernel] page_fault() */ 42 + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_KERNEL_PAGE_FAULT, }, 43 + }; 44 + 45 + /* 46 + * Will be casted to struct ip_callchain which has all 64 bit entries 47 + * of nr and ips[]. 48 + */ 49 + static u64 fake_callchains[][10] = { 50 + /* schedule => run_command => main */ 51 + { 3, FAKE_IP_KERNEL_SCHEDULE, FAKE_IP_PERF_RUN_COMMAND, FAKE_IP_PERF_MAIN, }, 52 + /* main */ 53 + { 1, FAKE_IP_PERF_MAIN, }, 54 + /* cmd_record => run_command => main */ 55 + { 3, FAKE_IP_PERF_CMD_RECORD, FAKE_IP_PERF_RUN_COMMAND, FAKE_IP_PERF_MAIN, }, 56 + /* malloc => cmd_record => run_command => main */ 57 + { 4, FAKE_IP_LIBC_MALLOC, FAKE_IP_PERF_CMD_RECORD, FAKE_IP_PERF_RUN_COMMAND, 58 + FAKE_IP_PERF_MAIN, }, 59 + /* free => cmd_record => run_command => main */ 60 + { 4, FAKE_IP_LIBC_FREE, FAKE_IP_PERF_CMD_RECORD, FAKE_IP_PERF_RUN_COMMAND, 61 + FAKE_IP_PERF_MAIN, }, 62 + /* main */ 63 + { 1, FAKE_IP_PERF_MAIN, }, 64 + /* page_fault => sys_perf_event_open => run_command => main */ 65 + { 4, FAKE_IP_KERNEL_PAGE_FAULT, FAKE_IP_KERNEL_SYS_PERF_EVENT_OPEN, 66 + FAKE_IP_PERF_RUN_COMMAND, FAKE_IP_PERF_MAIN, }, 67 + /* main */ 68 + { 1, FAKE_IP_BASH_MAIN, }, 69 + /* xmalloc => malloc => xmalloc => malloc => xmalloc => main */ 70 + { 6, FAKE_IP_BASH_XMALLOC, FAKE_IP_LIBC_MALLOC, FAKE_IP_BASH_XMALLOC, 71 + FAKE_IP_LIBC_MALLOC, FAKE_IP_BASH_XMALLOC, FAKE_IP_BASH_MAIN, }, 72 + /* page_fault => malloc => main */ 73 + { 3, FAKE_IP_KERNEL_PAGE_FAULT, FAKE_IP_LIBC_MALLOC, FAKE_IP_BASH_MAIN, }, 74 + }; 75 + 76 + static int add_hist_entries(struct hists *hists, struct machine *machine) 77 + { 78 + struct addr_location al; 79 + struct perf_evsel *evsel = hists_to_evsel(hists); 80 + struct perf_sample sample = { .period = 1000, }; 81 + size_t i; 82 + 83 + for (i = 0; i < ARRAY_SIZE(fake_samples); i++) { 84 + const union perf_event event = { 85 + .header = { 86 + .misc = PERF_RECORD_MISC_USER, 87 + }, 88 + }; 89 + struct hist_entry_iter iter = { 90 + .hide_unresolved = false, 91 + }; 92 + 93 + if (symbol_conf.cumulate_callchain) 94 + iter.ops = &hist_iter_cumulative; 95 + else 96 + iter.ops = &hist_iter_normal; 97 + 98 + sample.pid = fake_samples[i].pid; 99 + sample.tid = fake_samples[i].pid; 100 + sample.ip = fake_samples[i].ip; 101 + sample.callchain = (struct ip_callchain *)fake_callchains[i]; 102 + 103 + if (perf_event__preprocess_sample(&event, machine, &al, 104 + &sample) < 0) 105 + goto out; 106 + 107 + if (hist_entry_iter__add(&iter, &al, evsel, &sample, 108 + PERF_MAX_STACK_DEPTH, NULL) < 0) 109 + goto out; 110 + 111 + fake_samples[i].thread = al.thread; 112 + fake_samples[i].map = al.map; 113 + fake_samples[i].sym = al.sym; 114 + } 115 + 116 + return TEST_OK; 117 + 118 + out: 119 + pr_debug("Not enough memory for adding a hist entry\n"); 120 + return TEST_FAIL; 121 + } 122 + 123 + static void del_hist_entries(struct hists *hists) 124 + { 125 + struct hist_entry *he; 126 + struct rb_root *root_in; 127 + struct rb_root *root_out; 128 + struct rb_node *node; 129 + 130 + if (sort__need_collapse) 131 + root_in = &hists->entries_collapsed; 132 + else 133 + root_in = hists->entries_in; 134 + 135 + root_out = &hists->entries; 136 + 137 + while (!RB_EMPTY_ROOT(root_out)) { 138 + node = rb_first(root_out); 139 + 140 + he = rb_entry(node, struct hist_entry, rb_node); 141 + rb_erase(node, root_out); 142 + rb_erase(&he->rb_node_in, root_in); 143 + hist_entry__free(he); 144 + } 145 + } 146 + 147 + typedef int (*test_fn_t)(struct perf_evsel *, struct machine *); 148 + 149 + #define COMM(he) (thread__comm_str(he->thread)) 150 + #define DSO(he) (he->ms.map->dso->short_name) 151 + #define SYM(he) (he->ms.sym->name) 152 + #define CPU(he) (he->cpu) 153 + #define PID(he) (he->thread->tid) 154 + #define DEPTH(he) (he->callchain->max_depth) 155 + #define CDSO(cl) (cl->ms.map->dso->short_name) 156 + #define CSYM(cl) (cl->ms.sym->name) 157 + 158 + struct result { 159 + u64 children; 160 + u64 self; 161 + const char *comm; 162 + const char *dso; 163 + const char *sym; 164 + }; 165 + 166 + struct callchain_result { 167 + u64 nr; 168 + struct { 169 + const char *dso; 170 + const char *sym; 171 + } node[10]; 172 + }; 173 + 174 + static int do_test(struct hists *hists, struct result *expected, size_t nr_expected, 175 + struct callchain_result *expected_callchain, size_t nr_callchain) 176 + { 177 + char buf[32]; 178 + size_t i, c; 179 + struct hist_entry *he; 180 + struct rb_root *root; 181 + struct rb_node *node; 182 + struct callchain_node *cnode; 183 + struct callchain_list *clist; 184 + 185 + /* 186 + * adding and deleting hist entries must be done outside of this 187 + * function since TEST_ASSERT_VAL() returns in case of failure. 188 + */ 189 + hists__collapse_resort(hists, NULL); 190 + hists__output_resort(hists); 191 + 192 + if (verbose > 2) { 193 + pr_info("use callchain: %d, cumulate callchain: %d\n", 194 + symbol_conf.use_callchain, 195 + symbol_conf.cumulate_callchain); 196 + print_hists_out(hists); 197 + } 198 + 199 + root = &hists->entries; 200 + for (node = rb_first(root), i = 0; 201 + node && (he = rb_entry(node, struct hist_entry, rb_node)); 202 + node = rb_next(node), i++) { 203 + scnprintf(buf, sizeof(buf), "Invalid hist entry #%zd", i); 204 + 205 + TEST_ASSERT_VAL("Incorrect number of hist entry", 206 + i < nr_expected); 207 + TEST_ASSERT_VAL(buf, he->stat.period == expected[i].self && 208 + !strcmp(COMM(he), expected[i].comm) && 209 + !strcmp(DSO(he), expected[i].dso) && 210 + !strcmp(SYM(he), expected[i].sym)); 211 + 212 + if (symbol_conf.cumulate_callchain) 213 + TEST_ASSERT_VAL(buf, he->stat_acc->period == expected[i].children); 214 + 215 + if (!symbol_conf.use_callchain) 216 + continue; 217 + 218 + /* check callchain entries */ 219 + root = &he->callchain->node.rb_root; 220 + cnode = rb_entry(rb_first(root), struct callchain_node, rb_node); 221 + 222 + c = 0; 223 + list_for_each_entry(clist, &cnode->val, list) { 224 + scnprintf(buf, sizeof(buf), "Invalid callchain entry #%zd/%zd", i, c); 225 + 226 + TEST_ASSERT_VAL("Incorrect number of callchain entry", 227 + c < expected_callchain[i].nr); 228 + TEST_ASSERT_VAL(buf, 229 + !strcmp(CDSO(clist), expected_callchain[i].node[c].dso) && 230 + !strcmp(CSYM(clist), expected_callchain[i].node[c].sym)); 231 + c++; 232 + } 233 + /* TODO: handle multiple child nodes properly */ 234 + TEST_ASSERT_VAL("Incorrect number of callchain entry", 235 + c <= expected_callchain[i].nr); 236 + } 237 + TEST_ASSERT_VAL("Incorrect number of hist entry", 238 + i == nr_expected); 239 + TEST_ASSERT_VAL("Incorrect number of callchain entry", 240 + !symbol_conf.use_callchain || nr_expected == nr_callchain); 241 + return 0; 242 + } 243 + 244 + /* NO callchain + NO children */ 245 + static int test1(struct perf_evsel *evsel, struct machine *machine) 246 + { 247 + int err; 248 + struct hists *hists = &evsel->hists; 249 + /* 250 + * expected output: 251 + * 252 + * Overhead Command Shared Object Symbol 253 + * ======== ======= ============= ============== 254 + * 20.00% perf perf [.] main 255 + * 10.00% bash [kernel] [k] page_fault 256 + * 10.00% bash bash [.] main 257 + * 10.00% bash bash [.] xmalloc 258 + * 10.00% perf [kernel] [k] page_fault 259 + * 10.00% perf [kernel] [k] schedule 260 + * 10.00% perf libc [.] free 261 + * 10.00% perf libc [.] malloc 262 + * 10.00% perf perf [.] cmd_record 263 + */ 264 + struct result expected[] = { 265 + { 0, 2000, "perf", "perf", "main" }, 266 + { 0, 1000, "bash", "[kernel]", "page_fault" }, 267 + { 0, 1000, "bash", "bash", "main" }, 268 + { 0, 1000, "bash", "bash", "xmalloc" }, 269 + { 0, 1000, "perf", "[kernel]", "page_fault" }, 270 + { 0, 1000, "perf", "[kernel]", "schedule" }, 271 + { 0, 1000, "perf", "libc", "free" }, 272 + { 0, 1000, "perf", "libc", "malloc" }, 273 + { 0, 1000, "perf", "perf", "cmd_record" }, 274 + }; 275 + 276 + symbol_conf.use_callchain = false; 277 + symbol_conf.cumulate_callchain = false; 278 + 279 + setup_sorting(); 280 + callchain_register_param(&callchain_param); 281 + 282 + err = add_hist_entries(hists, machine); 283 + if (err < 0) 284 + goto out; 285 + 286 + err = do_test(hists, expected, ARRAY_SIZE(expected), NULL, 0); 287 + 288 + out: 289 + del_hist_entries(hists); 290 + reset_output_field(); 291 + return err; 292 + } 293 + 294 + /* callcain + NO children */ 295 + static int test2(struct perf_evsel *evsel, struct machine *machine) 296 + { 297 + int err; 298 + struct hists *hists = &evsel->hists; 299 + /* 300 + * expected output: 301 + * 302 + * Overhead Command Shared Object Symbol 303 + * ======== ======= ============= ============== 304 + * 20.00% perf perf [.] main 305 + * | 306 + * --- main 307 + * 308 + * 10.00% bash [kernel] [k] page_fault 309 + * | 310 + * --- page_fault 311 + * malloc 312 + * main 313 + * 314 + * 10.00% bash bash [.] main 315 + * | 316 + * --- main 317 + * 318 + * 10.00% bash bash [.] xmalloc 319 + * | 320 + * --- xmalloc 321 + * malloc 322 + * xmalloc <--- NOTE: there's a cycle 323 + * malloc 324 + * xmalloc 325 + * main 326 + * 327 + * 10.00% perf [kernel] [k] page_fault 328 + * | 329 + * --- page_fault 330 + * sys_perf_event_open 331 + * run_command 332 + * main 333 + * 334 + * 10.00% perf [kernel] [k] schedule 335 + * | 336 + * --- schedule 337 + * run_command 338 + * main 339 + * 340 + * 10.00% perf libc [.] free 341 + * | 342 + * --- free 343 + * cmd_record 344 + * run_command 345 + * main 346 + * 347 + * 10.00% perf libc [.] malloc 348 + * | 349 + * --- malloc 350 + * cmd_record 351 + * run_command 352 + * main 353 + * 354 + * 10.00% perf perf [.] cmd_record 355 + * | 356 + * --- cmd_record 357 + * run_command 358 + * main 359 + * 360 + */ 361 + struct result expected[] = { 362 + { 0, 2000, "perf", "perf", "main" }, 363 + { 0, 1000, "bash", "[kernel]", "page_fault" }, 364 + { 0, 1000, "bash", "bash", "main" }, 365 + { 0, 1000, "bash", "bash", "xmalloc" }, 366 + { 0, 1000, "perf", "[kernel]", "page_fault" }, 367 + { 0, 1000, "perf", "[kernel]", "schedule" }, 368 + { 0, 1000, "perf", "libc", "free" }, 369 + { 0, 1000, "perf", "libc", "malloc" }, 370 + { 0, 1000, "perf", "perf", "cmd_record" }, 371 + }; 372 + struct callchain_result expected_callchain[] = { 373 + { 374 + 1, { { "perf", "main" }, }, 375 + }, 376 + { 377 + 3, { { "[kernel]", "page_fault" }, 378 + { "libc", "malloc" }, 379 + { "bash", "main" }, }, 380 + }, 381 + { 382 + 1, { { "bash", "main" }, }, 383 + }, 384 + { 385 + 6, { { "bash", "xmalloc" }, 386 + { "libc", "malloc" }, 387 + { "bash", "xmalloc" }, 388 + { "libc", "malloc" }, 389 + { "bash", "xmalloc" }, 390 + { "bash", "main" }, }, 391 + }, 392 + { 393 + 4, { { "[kernel]", "page_fault" }, 394 + { "[kernel]", "sys_perf_event_open" }, 395 + { "perf", "run_command" }, 396 + { "perf", "main" }, }, 397 + }, 398 + { 399 + 3, { { "[kernel]", "schedule" }, 400 + { "perf", "run_command" }, 401 + { "perf", "main" }, }, 402 + }, 403 + { 404 + 4, { { "libc", "free" }, 405 + { "perf", "cmd_record" }, 406 + { "perf", "run_command" }, 407 + { "perf", "main" }, }, 408 + }, 409 + { 410 + 4, { { "libc", "malloc" }, 411 + { "perf", "cmd_record" }, 412 + { "perf", "run_command" }, 413 + { "perf", "main" }, }, 414 + }, 415 + { 416 + 3, { { "perf", "cmd_record" }, 417 + { "perf", "run_command" }, 418 + { "perf", "main" }, }, 419 + }, 420 + }; 421 + 422 + symbol_conf.use_callchain = true; 423 + symbol_conf.cumulate_callchain = false; 424 + 425 + setup_sorting(); 426 + callchain_register_param(&callchain_param); 427 + 428 + err = add_hist_entries(hists, machine); 429 + if (err < 0) 430 + goto out; 431 + 432 + err = do_test(hists, expected, ARRAY_SIZE(expected), 433 + expected_callchain, ARRAY_SIZE(expected_callchain)); 434 + 435 + out: 436 + del_hist_entries(hists); 437 + reset_output_field(); 438 + return err; 439 + } 440 + 441 + /* NO callchain + children */ 442 + static int test3(struct perf_evsel *evsel, struct machine *machine) 443 + { 444 + int err; 445 + struct hists *hists = &evsel->hists; 446 + /* 447 + * expected output: 448 + * 449 + * Children Self Command Shared Object Symbol 450 + * ======== ======== ======= ============= ======================= 451 + * 70.00% 20.00% perf perf [.] main 452 + * 50.00% 0.00% perf perf [.] run_command 453 + * 30.00% 10.00% bash bash [.] main 454 + * 30.00% 10.00% perf perf [.] cmd_record 455 + * 20.00% 0.00% bash libc [.] malloc 456 + * 10.00% 10.00% bash [kernel] [k] page_fault 457 + * 10.00% 10.00% perf [kernel] [k] schedule 458 + * 10.00% 0.00% perf [kernel] [k] sys_perf_event_open 459 + * 10.00% 10.00% perf [kernel] [k] page_fault 460 + * 10.00% 10.00% perf libc [.] free 461 + * 10.00% 10.00% perf libc [.] malloc 462 + * 10.00% 10.00% bash bash [.] xmalloc 463 + */ 464 + struct result expected[] = { 465 + { 7000, 2000, "perf", "perf", "main" }, 466 + { 5000, 0, "perf", "perf", "run_command" }, 467 + { 3000, 1000, "bash", "bash", "main" }, 468 + { 3000, 1000, "perf", "perf", "cmd_record" }, 469 + { 2000, 0, "bash", "libc", "malloc" }, 470 + { 1000, 1000, "bash", "[kernel]", "page_fault" }, 471 + { 1000, 1000, "perf", "[kernel]", "schedule" }, 472 + { 1000, 0, "perf", "[kernel]", "sys_perf_event_open" }, 473 + { 1000, 1000, "perf", "[kernel]", "page_fault" }, 474 + { 1000, 1000, "perf", "libc", "free" }, 475 + { 1000, 1000, "perf", "libc", "malloc" }, 476 + { 1000, 1000, "bash", "bash", "xmalloc" }, 477 + }; 478 + 479 + symbol_conf.use_callchain = false; 480 + symbol_conf.cumulate_callchain = true; 481 + 482 + setup_sorting(); 483 + callchain_register_param(&callchain_param); 484 + 485 + err = add_hist_entries(hists, machine); 486 + if (err < 0) 487 + goto out; 488 + 489 + err = do_test(hists, expected, ARRAY_SIZE(expected), NULL, 0); 490 + 491 + out: 492 + del_hist_entries(hists); 493 + reset_output_field(); 494 + return err; 495 + } 496 + 497 + /* callchain + children */ 498 + static int test4(struct perf_evsel *evsel, struct machine *machine) 499 + { 500 + int err; 501 + struct hists *hists = &evsel->hists; 502 + /* 503 + * expected output: 504 + * 505 + * Children Self Command Shared Object Symbol 506 + * ======== ======== ======= ============= ======================= 507 + * 70.00% 20.00% perf perf [.] main 508 + * | 509 + * --- main 510 + * 511 + * 50.00% 0.00% perf perf [.] run_command 512 + * | 513 + * --- run_command 514 + * main 515 + * 516 + * 30.00% 10.00% bash bash [.] main 517 + * | 518 + * --- main 519 + * 520 + * 30.00% 10.00% perf perf [.] cmd_record 521 + * | 522 + * --- cmd_record 523 + * run_command 524 + * main 525 + * 526 + * 20.00% 0.00% bash libc [.] malloc 527 + * | 528 + * --- malloc 529 + * | 530 + * |--50.00%-- xmalloc 531 + * | main 532 + * --50.00%-- main 533 + * 534 + * 10.00% 10.00% bash [kernel] [k] page_fault 535 + * | 536 + * --- page_fault 537 + * malloc 538 + * main 539 + * 540 + * 10.00% 10.00% perf [kernel] [k] schedule 541 + * | 542 + * --- schedule 543 + * run_command 544 + * main 545 + * 546 + * 10.00% 0.00% perf [kernel] [k] sys_perf_event_open 547 + * | 548 + * --- sys_perf_event_open 549 + * run_command 550 + * main 551 + * 552 + * 10.00% 10.00% perf [kernel] [k] page_fault 553 + * | 554 + * --- page_fault 555 + * sys_perf_event_open 556 + * run_command 557 + * main 558 + * 559 + * 10.00% 10.00% perf libc [.] free 560 + * | 561 + * --- free 562 + * cmd_record 563 + * run_command 564 + * main 565 + * 566 + * 10.00% 10.00% perf libc [.] malloc 567 + * | 568 + * --- malloc 569 + * cmd_record 570 + * run_command 571 + * main 572 + * 573 + * 10.00% 10.00% bash bash [.] xmalloc 574 + * | 575 + * --- xmalloc 576 + * malloc 577 + * xmalloc <--- NOTE: there's a cycle 578 + * malloc 579 + * xmalloc 580 + * main 581 + * 582 + */ 583 + struct result expected[] = { 584 + { 7000, 2000, "perf", "perf", "main" }, 585 + { 5000, 0, "perf", "perf", "run_command" }, 586 + { 3000, 1000, "bash", "bash", "main" }, 587 + { 3000, 1000, "perf", "perf", "cmd_record" }, 588 + { 2000, 0, "bash", "libc", "malloc" }, 589 + { 1000, 1000, "bash", "[kernel]", "page_fault" }, 590 + { 1000, 1000, "perf", "[kernel]", "schedule" }, 591 + { 1000, 0, "perf", "[kernel]", "sys_perf_event_open" }, 592 + { 1000, 1000, "perf", "[kernel]", "page_fault" }, 593 + { 1000, 1000, "perf", "libc", "free" }, 594 + { 1000, 1000, "perf", "libc", "malloc" }, 595 + { 1000, 1000, "bash", "bash", "xmalloc" }, 596 + }; 597 + struct callchain_result expected_callchain[] = { 598 + { 599 + 1, { { "perf", "main" }, }, 600 + }, 601 + { 602 + 2, { { "perf", "run_command" }, 603 + { "perf", "main" }, }, 604 + }, 605 + { 606 + 1, { { "bash", "main" }, }, 607 + }, 608 + { 609 + 3, { { "perf", "cmd_record" }, 610 + { "perf", "run_command" }, 611 + { "perf", "main" }, }, 612 + }, 613 + { 614 + 4, { { "libc", "malloc" }, 615 + { "bash", "xmalloc" }, 616 + { "bash", "main" }, 617 + { "bash", "main" }, }, 618 + }, 619 + { 620 + 3, { { "[kernel]", "page_fault" }, 621 + { "libc", "malloc" }, 622 + { "bash", "main" }, }, 623 + }, 624 + { 625 + 3, { { "[kernel]", "schedule" }, 626 + { "perf", "run_command" }, 627 + { "perf", "main" }, }, 628 + }, 629 + { 630 + 3, { { "[kernel]", "sys_perf_event_open" }, 631 + { "perf", "run_command" }, 632 + { "perf", "main" }, }, 633 + }, 634 + { 635 + 4, { { "[kernel]", "page_fault" }, 636 + { "[kernel]", "sys_perf_event_open" }, 637 + { "perf", "run_command" }, 638 + { "perf", "main" }, }, 639 + }, 640 + { 641 + 4, { { "libc", "free" }, 642 + { "perf", "cmd_record" }, 643 + { "perf", "run_command" }, 644 + { "perf", "main" }, }, 645 + }, 646 + { 647 + 4, { { "libc", "malloc" }, 648 + { "perf", "cmd_record" }, 649 + { "perf", "run_command" }, 650 + { "perf", "main" }, }, 651 + }, 652 + { 653 + 6, { { "bash", "xmalloc" }, 654 + { "libc", "malloc" }, 655 + { "bash", "xmalloc" }, 656 + { "libc", "malloc" }, 657 + { "bash", "xmalloc" }, 658 + { "bash", "main" }, }, 659 + }, 660 + }; 661 + 662 + symbol_conf.use_callchain = true; 663 + symbol_conf.cumulate_callchain = true; 664 + 665 + setup_sorting(); 666 + callchain_register_param(&callchain_param); 667 + 668 + err = add_hist_entries(hists, machine); 669 + if (err < 0) 670 + goto out; 671 + 672 + err = do_test(hists, expected, ARRAY_SIZE(expected), 673 + expected_callchain, ARRAY_SIZE(expected_callchain)); 674 + 675 + out: 676 + del_hist_entries(hists); 677 + reset_output_field(); 678 + return err; 679 + } 680 + 681 + int test__hists_cumulate(void) 682 + { 683 + int err = TEST_FAIL; 684 + struct machines machines; 685 + struct machine *machine; 686 + struct perf_evsel *evsel; 687 + struct perf_evlist *evlist = perf_evlist__new(); 688 + size_t i; 689 + test_fn_t testcases[] = { 690 + test1, 691 + test2, 692 + test3, 693 + test4, 694 + }; 695 + 696 + TEST_ASSERT_VAL("No memory", evlist); 697 + 698 + err = parse_events(evlist, "cpu-clock"); 699 + if (err) 700 + goto out; 701 + 702 + machines__init(&machines); 703 + 704 + /* setup threads/dso/map/symbols also */ 705 + machine = setup_fake_machine(&machines); 706 + if (!machine) 707 + goto out; 708 + 709 + if (verbose > 1) 710 + machine__fprintf(machine, stderr); 711 + 712 + evsel = perf_evlist__first(evlist); 713 + 714 + for (i = 0; i < ARRAY_SIZE(testcases); i++) { 715 + err = testcases[i](evsel, machine); 716 + if (err < 0) 717 + break; 718 + } 719 + 720 + out: 721 + /* tear down everything */ 722 + perf_evlist__delete(evlist); 723 + machines__exit(&machines); 724 + 725 + return err; 726 + }
+19 -20
tools/perf/tests/hists_filter.c
··· 21 21 /* For the numbers, see hists_common.c */ 22 22 static struct sample fake_samples[] = { 23 23 /* perf [kernel] schedule() */ 24 - { .pid = 100, .ip = 0xf0000 + 700, }, 24 + { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, }, 25 25 /* perf [perf] main() */ 26 - { .pid = 100, .ip = 0x40000 + 700, }, 26 + { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_MAIN, }, 27 27 /* perf [libc] malloc() */ 28 - { .pid = 100, .ip = 0x50000 + 700, }, 28 + { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, }, 29 29 /* perf [perf] main() */ 30 - { .pid = 200, .ip = 0x40000 + 700, }, /* will be merged */ 30 + { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, }, /* will be merged */ 31 31 /* perf [perf] cmd_record() */ 32 - { .pid = 200, .ip = 0x40000 + 900, }, 32 + { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_CMD_RECORD, }, 33 33 /* perf [kernel] page_fault() */ 34 - { .pid = 200, .ip = 0xf0000 + 800, }, 34 + { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_PAGE_FAULT, }, 35 35 /* bash [bash] main() */ 36 - { .pid = 300, .ip = 0x40000 + 700, }, 36 + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_MAIN, }, 37 37 /* bash [bash] xmalloc() */ 38 - { .pid = 300, .ip = 0x40000 + 800, }, 38 + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_XMALLOC, }, 39 39 /* bash [libc] malloc() */ 40 - { .pid = 300, .ip = 0x50000 + 700, }, 40 + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_LIBC_MALLOC, }, 41 41 /* bash [kernel] page_fault() */ 42 - { .pid = 300, .ip = 0xf0000 + 800, }, 42 + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_KERNEL_PAGE_FAULT, }, 43 43 }; 44 44 45 - static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine) 45 + static int add_hist_entries(struct perf_evlist *evlist, 46 + struct machine *machine __maybe_unused) 46 47 { 47 48 struct perf_evsel *evsel; 48 49 struct addr_location al; 49 - struct hist_entry *he; 50 - struct perf_sample sample = { .cpu = 0, }; 50 + struct perf_sample sample = { .period = 100, }; 51 51 size_t i; 52 52 53 53 /* ··· 61 61 .header = { 62 62 .misc = PERF_RECORD_MISC_USER, 63 63 }, 64 + }; 65 + struct hist_entry_iter iter = { 66 + .ops = &hist_iter_normal, 67 + .hide_unresolved = false, 64 68 }; 65 69 66 70 /* make sure it has no filter at first */ ··· 80 76 &sample) < 0) 81 77 goto out; 82 78 83 - he = __hists__add_entry(&evsel->hists, &al, NULL, 84 - NULL, NULL, 100, 1, 0); 85 - if (he == NULL) 79 + if (hist_entry_iter__add(&iter, &al, evsel, &sample, 80 + PERF_MAX_STACK_DEPTH, NULL) < 0) 86 81 goto out; 87 82 88 83 fake_samples[i].thread = al.thread; 89 84 fake_samples[i].map = al.map; 90 85 fake_samples[i].sym = al.sym; 91 - 92 - hists__inc_nr_events(he->hists, PERF_RECORD_SAMPLE); 93 - if (!he->filtered) 94 - he->hists->stats.nr_non_filtered_samples++; 95 86 } 96 87 } 97 88
+18 -18
tools/perf/tests/hists_link.c
··· 21 21 /* For the numbers, see hists_common.c */ 22 22 static struct sample fake_common_samples[] = { 23 23 /* perf [kernel] schedule() */ 24 - { .pid = 100, .ip = 0xf0000 + 700, }, 24 + { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, }, 25 25 /* perf [perf] main() */ 26 - { .pid = 200, .ip = 0x40000 + 700, }, 26 + { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, }, 27 27 /* perf [perf] cmd_record() */ 28 - { .pid = 200, .ip = 0x40000 + 900, }, 28 + { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_CMD_RECORD, }, 29 29 /* bash [bash] xmalloc() */ 30 - { .pid = 300, .ip = 0x40000 + 800, }, 30 + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_XMALLOC, }, 31 31 /* bash [libc] malloc() */ 32 - { .pid = 300, .ip = 0x50000 + 700, }, 32 + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_LIBC_MALLOC, }, 33 33 }; 34 34 35 35 static struct sample fake_samples[][5] = { 36 36 { 37 37 /* perf [perf] run_command() */ 38 - { .pid = 100, .ip = 0x40000 + 800, }, 38 + { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_RUN_COMMAND, }, 39 39 /* perf [libc] malloc() */ 40 - { .pid = 100, .ip = 0x50000 + 700, }, 40 + { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, }, 41 41 /* perf [kernel] page_fault() */ 42 - { .pid = 100, .ip = 0xf0000 + 800, }, 42 + { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_PAGE_FAULT, }, 43 43 /* perf [kernel] sys_perf_event_open() */ 44 - { .pid = 200, .ip = 0xf0000 + 900, }, 44 + { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_SYS_PERF_EVENT_OPEN, }, 45 45 /* bash [libc] free() */ 46 - { .pid = 300, .ip = 0x50000 + 800, }, 46 + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_LIBC_FREE, }, 47 47 }, 48 48 { 49 49 /* perf [libc] free() */ 50 - { .pid = 200, .ip = 0x50000 + 800, }, 50 + { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_LIBC_FREE, }, 51 51 /* bash [libc] malloc() */ 52 - { .pid = 300, .ip = 0x50000 + 700, }, /* will be merged */ 52 + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_LIBC_MALLOC, }, /* will be merged */ 53 53 /* bash [bash] xfee() */ 54 - { .pid = 300, .ip = 0x40000 + 900, }, 54 + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_XFREE, }, 55 55 /* bash [libc] realloc() */ 56 - { .pid = 300, .ip = 0x50000 + 900, }, 56 + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_LIBC_REALLOC, }, 57 57 /* bash [kernel] page_fault() */ 58 - { .pid = 300, .ip = 0xf0000 + 800, }, 58 + { .pid = FAKE_PID_BASH, .ip = FAKE_IP_KERNEL_PAGE_FAULT, }, 59 59 }, 60 60 }; 61 61 ··· 64 64 struct perf_evsel *evsel; 65 65 struct addr_location al; 66 66 struct hist_entry *he; 67 - struct perf_sample sample = { .cpu = 0, }; 67 + struct perf_sample sample = { .period = 1, }; 68 68 size_t i = 0, k; 69 69 70 70 /* ··· 88 88 goto out; 89 89 90 90 he = __hists__add_entry(&evsel->hists, &al, NULL, 91 - NULL, NULL, 1, 1, 0); 91 + NULL, NULL, 1, 1, 0, true); 92 92 if (he == NULL) 93 93 goto out; 94 94 ··· 112 112 goto out; 113 113 114 114 he = __hists__add_entry(&evsel->hists, &al, NULL, 115 - NULL, NULL, 1, 1, 0); 115 + NULL, NULL, 1, 1, 0, true); 116 116 if (he == NULL) 117 117 goto out; 118 118
+17 -14
tools/perf/tests/hists_output.c
··· 22 22 /* For the numbers, see hists_common.c */ 23 23 static struct sample fake_samples[] = { 24 24 /* perf [kernel] schedule() */ 25 - { .cpu = 0, .pid = 100, .ip = 0xf0000 + 700, }, 25 + { .cpu = 0, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, }, 26 26 /* perf [perf] main() */ 27 - { .cpu = 1, .pid = 100, .ip = 0x40000 + 700, }, 27 + { .cpu = 1, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_MAIN, }, 28 28 /* perf [perf] cmd_record() */ 29 - { .cpu = 1, .pid = 100, .ip = 0x40000 + 900, }, 29 + { .cpu = 1, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_CMD_RECORD, }, 30 30 /* perf [libc] malloc() */ 31 - { .cpu = 1, .pid = 100, .ip = 0x50000 + 700, }, 31 + { .cpu = 1, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, }, 32 32 /* perf [libc] free() */ 33 - { .cpu = 2, .pid = 100, .ip = 0x50000 + 800, }, 33 + { .cpu = 2, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_FREE, }, 34 34 /* perf [perf] main() */ 35 - { .cpu = 2, .pid = 200, .ip = 0x40000 + 700, }, 35 + { .cpu = 2, .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, }, 36 36 /* perf [kernel] page_fault() */ 37 - { .cpu = 2, .pid = 200, .ip = 0xf0000 + 800, }, 37 + { .cpu = 2, .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_PAGE_FAULT, }, 38 38 /* bash [bash] main() */ 39 - { .cpu = 3, .pid = 300, .ip = 0x40000 + 700, }, 39 + { .cpu = 3, .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_MAIN, }, 40 40 /* bash [bash] xmalloc() */ 41 - { .cpu = 0, .pid = 300, .ip = 0x40000 + 800, }, 41 + { .cpu = 0, .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_XMALLOC, }, 42 42 /* bash [kernel] page_fault() */ 43 - { .cpu = 1, .pid = 300, .ip = 0xf0000 + 800, }, 43 + { .cpu = 1, .pid = FAKE_PID_BASH, .ip = FAKE_IP_KERNEL_PAGE_FAULT, }, 44 44 }; 45 45 46 46 static int add_hist_entries(struct hists *hists, struct machine *machine) 47 47 { 48 48 struct addr_location al; 49 - struct hist_entry *he; 49 + struct perf_evsel *evsel = hists_to_evsel(hists); 50 50 struct perf_sample sample = { .period = 100, }; 51 51 size_t i; 52 52 ··· 55 55 .header = { 56 56 .misc = PERF_RECORD_MISC_USER, 57 57 }, 58 + }; 59 + struct hist_entry_iter iter = { 60 + .ops = &hist_iter_normal, 61 + .hide_unresolved = false, 58 62 }; 59 63 60 64 sample.cpu = fake_samples[i].cpu; ··· 70 66 &sample) < 0) 71 67 goto out; 72 68 73 - he = __hists__add_entry(hists, &al, NULL, NULL, NULL, 74 - sample.period, 1, 0); 75 - if (he == NULL) 69 + if (hist_entry_iter__add(&iter, &al, evsel, &sample, 70 + PERF_MAX_STACK_DEPTH, NULL) < 0) 76 71 goto out; 77 72 78 73 fake_samples[i].thread = al.thread;
+1
tools/perf/tests/tests.h
··· 45 45 int test__mmap_thread_lookup(void); 46 46 int test__thread_mg_share(void); 47 47 int test__hists_output(void); 48 + int test__hists_cumulate(void); 48 49 49 50 #if defined(__x86_64__) || defined(__i386__) || defined(__arm__) 50 51 #ifdef HAVE_DWARF_UNWIND_SUPPORT
+1 -1
tools/perf/ui/browser.c
··· 194 194 ui_helpline__vpush(format, args); 195 195 va_end(args); 196 196 } else { 197 - while ((key == ui__question_window("Warning!", text, 197 + while ((key = ui__question_window("Warning!", text, 198 198 "Press any key...", 199 199 timeout)) == K_RESIZE) 200 200 ui_browser__handle_resize(browser);
+40 -33
tools/perf/ui/browsers/hists.c
··· 37 37 static void hist_browser__update_nr_entries(struct hist_browser *hb); 38 38 39 39 static struct rb_node *hists__filter_entries(struct rb_node *nd, 40 - struct hists *hists, 41 40 float min_pcnt); 42 41 43 42 static bool hist_browser__has_filter(struct hist_browser *hb) ··· 318 319 struct hists *hists = browser->hists; 319 320 320 321 for (nd = rb_first(&hists->entries); 321 - (nd = hists__filter_entries(nd, hists, browser->min_pcnt)) != NULL; 322 + (nd = hists__filter_entries(nd, browser->min_pcnt)) != NULL; 322 323 nd = rb_next(nd)) { 323 324 struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node); 324 325 hist_entry__set_folding(he, unfold); ··· 650 651 __hpp__slsmg_color_printf, true); \ 651 652 } 652 653 654 + #define __HPP_COLOR_ACC_PERCENT_FN(_type, _field) \ 655 + static u64 __hpp_get_acc_##_field(struct hist_entry *he) \ 656 + { \ 657 + return he->stat_acc->_field; \ 658 + } \ 659 + \ 660 + static int \ 661 + hist_browser__hpp_color_##_type(struct perf_hpp_fmt *fmt __maybe_unused,\ 662 + struct perf_hpp *hpp, \ 663 + struct hist_entry *he) \ 664 + { \ 665 + if (!symbol_conf.cumulate_callchain) { \ 666 + int ret = scnprintf(hpp->buf, hpp->size, "%8s", "N/A"); \ 667 + slsmg_printf("%s", hpp->buf); \ 668 + \ 669 + return ret; \ 670 + } \ 671 + return __hpp__fmt(hpp, he, __hpp_get_acc_##_field, " %6.2f%%", \ 672 + __hpp__slsmg_color_printf, true); \ 673 + } 674 + 653 675 __HPP_COLOR_PERCENT_FN(overhead, period) 654 676 __HPP_COLOR_PERCENT_FN(overhead_sys, period_sys) 655 677 __HPP_COLOR_PERCENT_FN(overhead_us, period_us) 656 678 __HPP_COLOR_PERCENT_FN(overhead_guest_sys, period_guest_sys) 657 679 __HPP_COLOR_PERCENT_FN(overhead_guest_us, period_guest_us) 680 + __HPP_COLOR_ACC_PERCENT_FN(overhead_acc, period) 658 681 659 682 #undef __HPP_COLOR_PERCENT_FN 683 + #undef __HPP_COLOR_ACC_PERCENT_FN 660 684 661 685 void hist_browser__init_hpp(void) 662 686 { ··· 693 671 hist_browser__hpp_color_overhead_guest_sys; 694 672 perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_US].color = 695 673 hist_browser__hpp_color_overhead_guest_us; 674 + perf_hpp__format[PERF_HPP__OVERHEAD_ACC].color = 675 + hist_browser__hpp_color_overhead_acc; 696 676 } 697 677 698 678 static int hist_browser__show_entry(struct hist_browser *browser, ··· 807 783 808 784 for (nd = browser->top; nd; nd = rb_next(nd)) { 809 785 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); 810 - u64 total = hists__total_period(h->hists); 811 - float percent = 0.0; 786 + float percent; 812 787 813 788 if (h->filtered) 814 789 continue; 815 790 816 - if (total) 817 - percent = h->stat.period * 100.0 / total; 818 - 791 + percent = hist_entry__get_percent_limit(h); 819 792 if (percent < hb->min_pcnt) 820 793 continue; 821 794 ··· 825 804 } 826 805 827 806 static struct rb_node *hists__filter_entries(struct rb_node *nd, 828 - struct hists *hists, 829 807 float min_pcnt) 830 808 { 831 809 while (nd != NULL) { 832 810 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); 833 - u64 total = hists__total_period(hists); 834 - float percent = 0.0; 835 - 836 - if (total) 837 - percent = h->stat.period * 100.0 / total; 811 + float percent = hist_entry__get_percent_limit(h); 838 812 839 813 if (!h->filtered && percent >= min_pcnt) 840 814 return nd; ··· 841 825 } 842 826 843 827 static struct rb_node *hists__filter_prev_entries(struct rb_node *nd, 844 - struct hists *hists, 845 828 float min_pcnt) 846 829 { 847 830 while (nd != NULL) { 848 831 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); 849 - u64 total = hists__total_period(hists); 850 - float percent = 0.0; 851 - 852 - if (total) 853 - percent = h->stat.period * 100.0 / total; 832 + float percent = hist_entry__get_percent_limit(h); 854 833 855 834 if (!h->filtered && percent >= min_pcnt) 856 835 return nd; ··· 874 863 switch (whence) { 875 864 case SEEK_SET: 876 865 nd = hists__filter_entries(rb_first(browser->entries), 877 - hb->hists, hb->min_pcnt); 866 + hb->min_pcnt); 878 867 break; 879 868 case SEEK_CUR: 880 869 nd = browser->top; 881 870 goto do_offset; 882 871 case SEEK_END: 883 872 nd = hists__filter_prev_entries(rb_last(browser->entries), 884 - hb->hists, hb->min_pcnt); 873 + hb->min_pcnt); 885 874 first = false; 886 875 break; 887 876 default: ··· 924 913 break; 925 914 } 926 915 } 927 - nd = hists__filter_entries(rb_next(nd), hb->hists, 928 - hb->min_pcnt); 916 + nd = hists__filter_entries(rb_next(nd), hb->min_pcnt); 929 917 if (nd == NULL) 930 918 break; 931 919 --offset; ··· 957 947 } 958 948 } 959 949 960 - nd = hists__filter_prev_entries(rb_prev(nd), hb->hists, 950 + nd = hists__filter_prev_entries(rb_prev(nd), 961 951 hb->min_pcnt); 962 952 if (nd == NULL) 963 953 break; ··· 1136 1126 static int hist_browser__fprintf(struct hist_browser *browser, FILE *fp) 1137 1127 { 1138 1128 struct rb_node *nd = hists__filter_entries(rb_first(browser->b.entries), 1139 - browser->hists, 1140 1129 browser->min_pcnt); 1141 1130 int printed = 0; 1142 1131 ··· 1143 1134 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); 1144 1135 1145 1136 printed += hist_browser__fprintf_entry(browser, h, fp); 1146 - nd = hists__filter_entries(rb_next(nd), browser->hists, 1147 - browser->min_pcnt); 1137 + nd = hists__filter_entries(rb_next(nd), browser->min_pcnt); 1148 1138 } 1149 1139 1150 1140 return printed; ··· 1380 1372 return; 1381 1373 } 1382 1374 1383 - while ((nd = hists__filter_entries(nd, hb->hists, 1384 - hb->min_pcnt)) != NULL) { 1375 + while ((nd = hists__filter_entries(nd, hb->min_pcnt)) != NULL) { 1385 1376 nr_entries++; 1386 1377 nd = rb_next(nd); 1387 1378 } ··· 1706 1699 zoom_out_dso: 1707 1700 ui_helpline__pop(); 1708 1701 browser->hists->dso_filter = NULL; 1709 - sort_dso.elide = false; 1702 + perf_hpp__set_elide(HISTC_DSO, false); 1710 1703 } else { 1711 1704 if (dso == NULL) 1712 1705 continue; 1713 1706 ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s DSO\"", 1714 1707 dso->kernel ? "the Kernel" : dso->short_name); 1715 1708 browser->hists->dso_filter = dso; 1716 - sort_dso.elide = true; 1709 + perf_hpp__set_elide(HISTC_DSO, true); 1717 1710 pstack__push(fstack, &browser->hists->dso_filter); 1718 1711 } 1719 1712 hists__filter_by_dso(hists); ··· 1725 1718 zoom_out_thread: 1726 1719 ui_helpline__pop(); 1727 1720 browser->hists->thread_filter = NULL; 1728 - sort_thread.elide = false; 1721 + perf_hpp__set_elide(HISTC_THREAD, false); 1729 1722 } else { 1730 1723 ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s(%d) thread\"", 1731 1724 thread->comm_set ? thread__comm_str(thread) : "", 1732 1725 thread->tid); 1733 1726 browser->hists->thread_filter = thread; 1734 - sort_thread.elide = true; 1727 + perf_hpp__set_elide(HISTC_THREAD, false); 1735 1728 pstack__push(fstack, &browser->hists->thread_filter); 1736 1729 } 1737 1730 hists__filter_by_thread(hists);
+28 -5
tools/perf/ui/gtk/hists.c
··· 47 47 __percent_color_snprintf, true); \ 48 48 } 49 49 50 + #define __HPP_COLOR_ACC_PERCENT_FN(_type, _field) \ 51 + static u64 he_get_acc_##_field(struct hist_entry *he) \ 52 + { \ 53 + return he->stat_acc->_field; \ 54 + } \ 55 + \ 56 + static int perf_gtk__hpp_color_##_type(struct perf_hpp_fmt *fmt __maybe_unused, \ 57 + struct perf_hpp *hpp, \ 58 + struct hist_entry *he) \ 59 + { \ 60 + return __hpp__fmt_acc(hpp, he, he_get_acc_##_field, " %6.2f%%", \ 61 + __percent_color_snprintf, true); \ 62 + } 63 + 50 64 __HPP_COLOR_PERCENT_FN(overhead, period) 51 65 __HPP_COLOR_PERCENT_FN(overhead_sys, period_sys) 52 66 __HPP_COLOR_PERCENT_FN(overhead_us, period_us) 53 67 __HPP_COLOR_PERCENT_FN(overhead_guest_sys, period_guest_sys) 54 68 __HPP_COLOR_PERCENT_FN(overhead_guest_us, period_guest_us) 69 + __HPP_COLOR_ACC_PERCENT_FN(overhead_acc, period) 55 70 56 71 #undef __HPP_COLOR_PERCENT_FN 57 72 ··· 83 68 perf_gtk__hpp_color_overhead_guest_sys; 84 69 perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_US].color = 85 70 perf_gtk__hpp_color_overhead_guest_us; 71 + perf_hpp__format[PERF_HPP__OVERHEAD_ACC].color = 72 + perf_gtk__hpp_color_overhead_acc; 86 73 } 87 74 88 75 static void callchain_list__sym_name(struct callchain_list *cl, ··· 198 181 if (perf_hpp__should_skip(fmt)) 199 182 continue; 200 183 184 + /* 185 + * XXX no way to determine where symcol column is.. 186 + * Just use last column for now. 187 + */ 188 + if (perf_hpp__is_sort_entry(fmt)) 189 + sym_col = col_idx; 190 + 201 191 fmt->header(fmt, &hpp, hists_to_evsel(hists)); 202 192 203 193 gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view), ··· 233 209 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); 234 210 GtkTreeIter iter; 235 211 u64 total = hists__total_period(h->hists); 236 - float percent = 0.0; 212 + float percent; 237 213 238 214 if (h->filtered) 239 215 continue; 240 216 241 - if (total) 242 - percent = h->stat.period * 100.0 / total; 243 - 217 + percent = hist_entry__get_percent_limit(h); 244 218 if (percent < min_pcnt) 245 219 continue; 246 220 ··· 260 238 261 239 if (symbol_conf.use_callchain && sort__has_sym) { 262 240 if (callchain_param.mode == CHAIN_GRAPH_REL) 263 - total = h->stat.period; 241 + total = symbol_conf.cumulate_callchain ? 242 + h->stat_acc->period : h->stat.period; 264 243 265 244 perf_gtk__add_callchain(&h->sorted_chain, store, &iter, 266 245 sym_col, total);
+119
tools/perf/ui/hist.c
··· 104 104 return ret; 105 105 } 106 106 107 + int __hpp__fmt_acc(struct perf_hpp *hpp, struct hist_entry *he, 108 + hpp_field_fn get_field, const char *fmt, 109 + hpp_snprint_fn print_fn, bool fmt_percent) 110 + { 111 + if (!symbol_conf.cumulate_callchain) { 112 + return snprintf(hpp->buf, hpp->size, "%*s", 113 + fmt_percent ? 8 : 12, "N/A"); 114 + } 115 + 116 + return __hpp__fmt(hpp, he, get_field, fmt, print_fn, fmt_percent); 117 + } 118 + 107 119 static int field_cmp(u64 field_a, u64 field_b) 108 120 { 109 121 if (field_a > field_b) ··· 169 157 free(fields_a); 170 158 free(fields_b); 171 159 160 + return ret; 161 + } 162 + 163 + static int __hpp__sort_acc(struct hist_entry *a, struct hist_entry *b, 164 + hpp_field_fn get_field) 165 + { 166 + s64 ret = 0; 167 + 168 + if (symbol_conf.cumulate_callchain) { 169 + /* 170 + * Put caller above callee when they have equal period. 171 + */ 172 + ret = field_cmp(get_field(a), get_field(b)); 173 + if (ret) 174 + return ret; 175 + 176 + ret = b->callchain->max_depth - a->callchain->max_depth; 177 + } 172 178 return ret; 173 179 } 174 180 ··· 272 242 return __hpp__sort(a, b, he_get_##_field); \ 273 243 } 274 244 245 + #define __HPP_COLOR_ACC_PERCENT_FN(_type, _field) \ 246 + static u64 he_get_acc_##_field(struct hist_entry *he) \ 247 + { \ 248 + return he->stat_acc->_field; \ 249 + } \ 250 + \ 251 + static int hpp__color_##_type(struct perf_hpp_fmt *fmt __maybe_unused, \ 252 + struct perf_hpp *hpp, struct hist_entry *he) \ 253 + { \ 254 + return __hpp__fmt_acc(hpp, he, he_get_acc_##_field, " %6.2f%%", \ 255 + hpp_color_scnprintf, true); \ 256 + } 257 + 258 + #define __HPP_ENTRY_ACC_PERCENT_FN(_type, _field) \ 259 + static int hpp__entry_##_type(struct perf_hpp_fmt *_fmt __maybe_unused, \ 260 + struct perf_hpp *hpp, struct hist_entry *he) \ 261 + { \ 262 + const char *fmt = symbol_conf.field_sep ? " %.2f" : " %6.2f%%"; \ 263 + return __hpp__fmt_acc(hpp, he, he_get_acc_##_field, fmt, \ 264 + hpp_entry_scnprintf, true); \ 265 + } 266 + 267 + #define __HPP_SORT_ACC_FN(_type, _field) \ 268 + static int64_t hpp__sort_##_type(struct hist_entry *a, struct hist_entry *b) \ 269 + { \ 270 + return __hpp__sort_acc(a, b, he_get_acc_##_field); \ 271 + } 272 + 275 273 #define __HPP_ENTRY_RAW_FN(_type, _field) \ 276 274 static u64 he_get_raw_##_field(struct hist_entry *he) \ 277 275 { \ ··· 328 270 __HPP_ENTRY_PERCENT_FN(_type, _field) \ 329 271 __HPP_SORT_FN(_type, _field) 330 272 273 + #define HPP_PERCENT_ACC_FNS(_type, _str, _field, _min_width, _unit_width)\ 274 + __HPP_HEADER_FN(_type, _str, _min_width, _unit_width) \ 275 + __HPP_WIDTH_FN(_type, _min_width, _unit_width) \ 276 + __HPP_COLOR_ACC_PERCENT_FN(_type, _field) \ 277 + __HPP_ENTRY_ACC_PERCENT_FN(_type, _field) \ 278 + __HPP_SORT_ACC_FN(_type, _field) 279 + 331 280 #define HPP_RAW_FNS(_type, _str, _field, _min_width, _unit_width) \ 332 281 __HPP_HEADER_FN(_type, _str, _min_width, _unit_width) \ 333 282 __HPP_WIDTH_FN(_type, _min_width, _unit_width) \ 334 283 __HPP_ENTRY_RAW_FN(_type, _field) \ 335 284 __HPP_SORT_RAW_FN(_type, _field) 336 285 286 + __HPP_HEADER_FN(overhead_self, "Self", 8, 8) 337 287 338 288 HPP_PERCENT_FNS(overhead, "Overhead", period, 8, 8) 339 289 HPP_PERCENT_FNS(overhead_sys, "sys", period_sys, 8, 8) 340 290 HPP_PERCENT_FNS(overhead_us, "usr", period_us, 8, 8) 341 291 HPP_PERCENT_FNS(overhead_guest_sys, "guest sys", period_guest_sys, 9, 8) 342 292 HPP_PERCENT_FNS(overhead_guest_us, "guest usr", period_guest_us, 9, 8) 293 + HPP_PERCENT_ACC_FNS(overhead_acc, "Children", period, 8, 8) 343 294 344 295 HPP_RAW_FNS(samples, "Samples", nr_events, 12, 12) 345 296 HPP_RAW_FNS(period, "Period", period, 12, 12) ··· 360 293 } 361 294 362 295 #define HPP__COLOR_PRINT_FNS(_name) \ 296 + { \ 297 + .header = hpp__header_ ## _name, \ 298 + .width = hpp__width_ ## _name, \ 299 + .color = hpp__color_ ## _name, \ 300 + .entry = hpp__entry_ ## _name, \ 301 + .cmp = hpp__nop_cmp, \ 302 + .collapse = hpp__nop_cmp, \ 303 + .sort = hpp__sort_ ## _name, \ 304 + } 305 + 306 + #define HPP__COLOR_ACC_PRINT_FNS(_name) \ 363 307 { \ 364 308 .header = hpp__header_ ## _name, \ 365 309 .width = hpp__width_ ## _name, \ ··· 397 319 HPP__COLOR_PRINT_FNS(overhead_us), 398 320 HPP__COLOR_PRINT_FNS(overhead_guest_sys), 399 321 HPP__COLOR_PRINT_FNS(overhead_guest_us), 322 + HPP__COLOR_ACC_PRINT_FNS(overhead_acc), 400 323 HPP__PRINT_FNS(samples), 401 324 HPP__PRINT_FNS(period) 402 325 }; ··· 407 328 408 329 409 330 #undef HPP__COLOR_PRINT_FNS 331 + #undef HPP__COLOR_ACC_PRINT_FNS 410 332 #undef HPP__PRINT_FNS 411 333 412 334 #undef HPP_PERCENT_FNS 335 + #undef HPP_PERCENT_ACC_FNS 413 336 #undef HPP_RAW_FNS 414 337 415 338 #undef __HPP_HEADER_FN 416 339 #undef __HPP_WIDTH_FN 417 340 #undef __HPP_COLOR_PERCENT_FN 418 341 #undef __HPP_ENTRY_PERCENT_FN 342 + #undef __HPP_COLOR_ACC_PERCENT_FN 343 + #undef __HPP_ENTRY_ACC_PERCENT_FN 419 344 #undef __HPP_ENTRY_RAW_FN 345 + #undef __HPP_SORT_FN 346 + #undef __HPP_SORT_ACC_FN 347 + #undef __HPP_SORT_RAW_FN 420 348 421 349 422 350 void perf_hpp__init(void) ··· 447 361 if (field_order) 448 362 return; 449 363 364 + if (symbol_conf.cumulate_callchain) { 365 + perf_hpp__column_enable(PERF_HPP__OVERHEAD_ACC); 366 + 367 + perf_hpp__format[PERF_HPP__OVERHEAD].header = 368 + hpp__header_overhead_self; 369 + } 370 + 450 371 perf_hpp__column_enable(PERF_HPP__OVERHEAD); 451 372 452 373 if (symbol_conf.show_cpu_utilization) { ··· 476 383 list = &perf_hpp__format[PERF_HPP__OVERHEAD].sort_list; 477 384 if (list_empty(list)) 478 385 list_add(list, &perf_hpp__sort_list); 386 + 387 + if (symbol_conf.cumulate_callchain) { 388 + list = &perf_hpp__format[PERF_HPP__OVERHEAD_ACC].sort_list; 389 + if (list_empty(list)) 390 + list_add(list, &perf_hpp__sort_list); 391 + } 479 392 } 480 393 481 394 void perf_hpp__column_register(struct perf_hpp_fmt *format) 482 395 { 483 396 list_add_tail(&format->list, &perf_hpp__list); 397 + } 398 + 399 + void perf_hpp__column_unregister(struct perf_hpp_fmt *format) 400 + { 401 + list_del(&format->list); 484 402 } 485 403 486 404 void perf_hpp__register_sort_field(struct perf_hpp_fmt *format) ··· 503 399 { 504 400 BUG_ON(col >= PERF_HPP__MAX_INDEX); 505 401 perf_hpp__column_register(&perf_hpp__format[col]); 402 + } 403 + 404 + void perf_hpp__column_disable(unsigned col) 405 + { 406 + BUG_ON(col >= PERF_HPP__MAX_INDEX); 407 + perf_hpp__column_unregister(&perf_hpp__format[col]); 408 + } 409 + 410 + void perf_hpp__cancel_cumulate(void) 411 + { 412 + if (field_order) 413 + return; 414 + 415 + perf_hpp__column_disable(PERF_HPP__OVERHEAD_ACC); 416 + perf_hpp__format[PERF_HPP__OVERHEAD].header = hpp__header_overhead; 506 417 } 507 418 508 419 void perf_hpp__setup_output_field(void)
+5 -3
tools/perf/ui/stdio/hist.c
··· 271 271 { 272 272 switch (callchain_param.mode) { 273 273 case CHAIN_GRAPH_REL: 274 - return callchain__fprintf_graph(fp, &he->sorted_chain, he->stat.period, 274 + return callchain__fprintf_graph(fp, &he->sorted_chain, 275 + symbol_conf.cumulate_callchain ? 276 + he->stat_acc->period : he->stat.period, 275 277 left_margin); 276 278 break; 277 279 case CHAIN_GRAPH_ABS: ··· 463 461 464 462 for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { 465 463 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); 466 - float percent = h->stat.period * 100.0 / 467 - hists->stats.total_period; 464 + float percent; 468 465 469 466 if (h->filtered) 470 467 continue; 471 468 469 + percent = hist_entry__get_percent_limit(h); 472 470 if (percent < min_pcnt) 473 471 continue; 474 472
+44 -1
tools/perf/util/callchain.c
··· 616 616 if (sample->callchain == NULL) 617 617 return 0; 618 618 619 - if (symbol_conf.use_callchain || sort__has_parent) { 619 + if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain || 620 + sort__has_parent) { 620 621 return machine__resolve_callchain(al->machine, evsel, al->thread, 621 622 sample, parent, al, max_stack); 622 623 } ··· 629 628 if (!symbol_conf.use_callchain) 630 629 return 0; 631 630 return callchain_append(he->callchain, &callchain_cursor, sample->period); 631 + } 632 + 633 + int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *node, 634 + bool hide_unresolved) 635 + { 636 + al->map = node->map; 637 + al->sym = node->sym; 638 + if (node->map) 639 + al->addr = node->map->map_ip(node->map, node->ip); 640 + else 641 + al->addr = node->ip; 642 + 643 + if (al->sym == NULL) { 644 + if (hide_unresolved) 645 + return 0; 646 + if (al->map == NULL) 647 + goto out; 648 + } 649 + 650 + if (al->map->groups == &al->machine->kmaps) { 651 + if (machine__is_host(al->machine)) { 652 + al->cpumode = PERF_RECORD_MISC_KERNEL; 653 + al->level = 'k'; 654 + } else { 655 + al->cpumode = PERF_RECORD_MISC_GUEST_KERNEL; 656 + al->level = 'g'; 657 + } 658 + } else { 659 + if (machine__is_host(al->machine)) { 660 + al->cpumode = PERF_RECORD_MISC_USER; 661 + al->level = '.'; 662 + } else if (perf_guest) { 663 + al->cpumode = PERF_RECORD_MISC_GUEST_USER; 664 + al->level = 'u'; 665 + } else { 666 + al->cpumode = PERF_RECORD_MISC_HYPERVISOR; 667 + al->level = 'H'; 668 + } 669 + } 670 + 671 + out: 672 + return 1; 632 673 }
+11
tools/perf/util/callchain.h
··· 162 162 struct perf_evsel *evsel, struct addr_location *al, 163 163 int max_stack); 164 164 int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *sample); 165 + int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *node, 166 + bool hide_unresolved); 165 167 166 168 extern const char record_callchain_help[]; 167 169 int parse_callchain_report_opt(const char *arg); 170 + 171 + static inline void callchain_cursor_snapshot(struct callchain_cursor *dest, 172 + struct callchain_cursor *src) 173 + { 174 + *dest = *src; 175 + 176 + dest->first = src->curr; 177 + dest->nr -= src->pos; 178 + } 168 179 #endif /* __PERF_CALLCHAIN_H */
+471 -10
tools/perf/util/hist.c
··· 4 4 #include "session.h" 5 5 #include "sort.h" 6 6 #include "evsel.h" 7 + #include "annotate.h" 7 8 #include <math.h> 8 9 9 10 static bool hists__filter_entry_by_dso(struct hists *hists, ··· 232 231 return true; 233 232 234 233 he_stat__decay(&he->stat); 234 + if (symbol_conf.cumulate_callchain) 235 + he_stat__decay(he->stat_acc); 235 236 236 237 diff = prev_period - he->stat.period; 237 238 ··· 279 276 * histogram, sorted on item, collects periods 280 277 */ 281 278 282 - static struct hist_entry *hist_entry__new(struct hist_entry *template) 279 + static struct hist_entry *hist_entry__new(struct hist_entry *template, 280 + bool sample_self) 283 281 { 284 - size_t callchain_size = symbol_conf.use_callchain ? sizeof(struct callchain_root) : 0; 285 - struct hist_entry *he = zalloc(sizeof(*he) + callchain_size); 282 + size_t callchain_size = 0; 283 + struct hist_entry *he; 284 + 285 + if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) 286 + callchain_size = sizeof(struct callchain_root); 287 + 288 + he = zalloc(sizeof(*he) + callchain_size); 286 289 287 290 if (he != NULL) { 288 291 *he = *template; 292 + 293 + if (symbol_conf.cumulate_callchain) { 294 + he->stat_acc = malloc(sizeof(he->stat)); 295 + if (he->stat_acc == NULL) { 296 + free(he); 297 + return NULL; 298 + } 299 + memcpy(he->stat_acc, &he->stat, sizeof(he->stat)); 300 + if (!sample_self) 301 + memset(&he->stat, 0, sizeof(he->stat)); 302 + } 289 303 290 304 if (he->ms.map) 291 305 he->ms.map->referenced = true; ··· 315 295 */ 316 296 he->branch_info = malloc(sizeof(*he->branch_info)); 317 297 if (he->branch_info == NULL) { 298 + free(he->stat_acc); 318 299 free(he); 319 300 return NULL; 320 301 } ··· 354 333 355 334 static struct hist_entry *add_hist_entry(struct hists *hists, 356 335 struct hist_entry *entry, 357 - struct addr_location *al) 336 + struct addr_location *al, 337 + bool sample_self) 358 338 { 359 339 struct rb_node **p; 360 340 struct rb_node *parent = NULL; ··· 379 357 cmp = hist_entry__cmp(he, entry); 380 358 381 359 if (!cmp) { 382 - he_stat__add_period(&he->stat, period, weight); 360 + if (sample_self) 361 + he_stat__add_period(&he->stat, period, weight); 362 + if (symbol_conf.cumulate_callchain) 363 + he_stat__add_period(he->stat_acc, period, weight); 383 364 384 365 /* 385 366 * This mem info was allocated from sample__resolve_mem ··· 410 385 p = &(*p)->rb_right; 411 386 } 412 387 413 - he = hist_entry__new(entry); 388 + he = hist_entry__new(entry, sample_self); 414 389 if (!he) 415 390 return NULL; 416 391 417 392 rb_link_node(&he->rb_node_in, parent, p); 418 393 rb_insert_color(&he->rb_node_in, hists->entries_in); 419 394 out: 420 - he_stat__add_cpumode_period(&he->stat, al->cpumode, period); 395 + if (sample_self) 396 + he_stat__add_cpumode_period(&he->stat, al->cpumode, period); 397 + if (symbol_conf.cumulate_callchain) 398 + he_stat__add_cpumode_period(he->stat_acc, al->cpumode, period); 421 399 return he; 422 400 } 423 401 ··· 429 401 struct symbol *sym_parent, 430 402 struct branch_info *bi, 431 403 struct mem_info *mi, 432 - u64 period, u64 weight, u64 transaction) 404 + u64 period, u64 weight, u64 transaction, 405 + bool sample_self) 433 406 { 434 407 struct hist_entry entry = { 435 408 .thread = al->thread, ··· 455 426 .transaction = transaction, 456 427 }; 457 428 458 - return add_hist_entry(hists, &entry, al); 429 + return add_hist_entry(hists, &entry, al, sample_self); 430 + } 431 + 432 + static int 433 + iter_next_nop_entry(struct hist_entry_iter *iter __maybe_unused, 434 + struct addr_location *al __maybe_unused) 435 + { 436 + return 0; 437 + } 438 + 439 + static int 440 + iter_add_next_nop_entry(struct hist_entry_iter *iter __maybe_unused, 441 + struct addr_location *al __maybe_unused) 442 + { 443 + return 0; 444 + } 445 + 446 + static int 447 + iter_prepare_mem_entry(struct hist_entry_iter *iter, struct addr_location *al) 448 + { 449 + struct perf_sample *sample = iter->sample; 450 + struct mem_info *mi; 451 + 452 + mi = sample__resolve_mem(sample, al); 453 + if (mi == NULL) 454 + return -ENOMEM; 455 + 456 + iter->priv = mi; 457 + return 0; 458 + } 459 + 460 + static int 461 + iter_add_single_mem_entry(struct hist_entry_iter *iter, struct addr_location *al) 462 + { 463 + u64 cost; 464 + struct mem_info *mi = iter->priv; 465 + struct hist_entry *he; 466 + 467 + if (mi == NULL) 468 + return -EINVAL; 469 + 470 + cost = iter->sample->weight; 471 + if (!cost) 472 + cost = 1; 473 + 474 + /* 475 + * must pass period=weight in order to get the correct 476 + * sorting from hists__collapse_resort() which is solely 477 + * based on periods. We want sorting be done on nr_events * weight 478 + * and this is indirectly achieved by passing period=weight here 479 + * and the he_stat__add_period() function. 480 + */ 481 + he = __hists__add_entry(&iter->evsel->hists, al, iter->parent, NULL, mi, 482 + cost, cost, 0, true); 483 + if (!he) 484 + return -ENOMEM; 485 + 486 + iter->he = he; 487 + return 0; 488 + } 489 + 490 + static int 491 + iter_finish_mem_entry(struct hist_entry_iter *iter, 492 + struct addr_location *al __maybe_unused) 493 + { 494 + struct perf_evsel *evsel = iter->evsel; 495 + struct hist_entry *he = iter->he; 496 + int err = -EINVAL; 497 + 498 + if (he == NULL) 499 + goto out; 500 + 501 + hists__inc_nr_samples(&evsel->hists, he->filtered); 502 + 503 + err = hist_entry__append_callchain(he, iter->sample); 504 + 505 + out: 506 + /* 507 + * We don't need to free iter->priv (mem_info) here since 508 + * the mem info was either already freed in add_hist_entry() or 509 + * passed to a new hist entry by hist_entry__new(). 510 + */ 511 + iter->priv = NULL; 512 + 513 + iter->he = NULL; 514 + return err; 515 + } 516 + 517 + static int 518 + iter_prepare_branch_entry(struct hist_entry_iter *iter, struct addr_location *al) 519 + { 520 + struct branch_info *bi; 521 + struct perf_sample *sample = iter->sample; 522 + 523 + bi = sample__resolve_bstack(sample, al); 524 + if (!bi) 525 + return -ENOMEM; 526 + 527 + iter->curr = 0; 528 + iter->total = sample->branch_stack->nr; 529 + 530 + iter->priv = bi; 531 + return 0; 532 + } 533 + 534 + static int 535 + iter_add_single_branch_entry(struct hist_entry_iter *iter __maybe_unused, 536 + struct addr_location *al __maybe_unused) 537 + { 538 + /* to avoid calling callback function */ 539 + iter->he = NULL; 540 + 541 + return 0; 542 + } 543 + 544 + static int 545 + iter_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *al) 546 + { 547 + struct branch_info *bi = iter->priv; 548 + int i = iter->curr; 549 + 550 + if (bi == NULL) 551 + return 0; 552 + 553 + if (iter->curr >= iter->total) 554 + return 0; 555 + 556 + al->map = bi[i].to.map; 557 + al->sym = bi[i].to.sym; 558 + al->addr = bi[i].to.addr; 559 + return 1; 560 + } 561 + 562 + static int 563 + iter_add_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *al) 564 + { 565 + struct branch_info *bi; 566 + struct perf_evsel *evsel = iter->evsel; 567 + struct hist_entry *he = NULL; 568 + int i = iter->curr; 569 + int err = 0; 570 + 571 + bi = iter->priv; 572 + 573 + if (iter->hide_unresolved && !(bi[i].from.sym && bi[i].to.sym)) 574 + goto out; 575 + 576 + /* 577 + * The report shows the percentage of total branches captured 578 + * and not events sampled. Thus we use a pseudo period of 1. 579 + */ 580 + he = __hists__add_entry(&evsel->hists, al, iter->parent, &bi[i], NULL, 581 + 1, 1, 0, true); 582 + if (he == NULL) 583 + return -ENOMEM; 584 + 585 + hists__inc_nr_samples(&evsel->hists, he->filtered); 586 + 587 + out: 588 + iter->he = he; 589 + iter->curr++; 590 + return err; 591 + } 592 + 593 + static int 594 + iter_finish_branch_entry(struct hist_entry_iter *iter, 595 + struct addr_location *al __maybe_unused) 596 + { 597 + zfree(&iter->priv); 598 + iter->he = NULL; 599 + 600 + return iter->curr >= iter->total ? 0 : -1; 601 + } 602 + 603 + static int 604 + iter_prepare_normal_entry(struct hist_entry_iter *iter __maybe_unused, 605 + struct addr_location *al __maybe_unused) 606 + { 607 + return 0; 608 + } 609 + 610 + static int 611 + iter_add_single_normal_entry(struct hist_entry_iter *iter, struct addr_location *al) 612 + { 613 + struct perf_evsel *evsel = iter->evsel; 614 + struct perf_sample *sample = iter->sample; 615 + struct hist_entry *he; 616 + 617 + he = __hists__add_entry(&evsel->hists, al, iter->parent, NULL, NULL, 618 + sample->period, sample->weight, 619 + sample->transaction, true); 620 + if (he == NULL) 621 + return -ENOMEM; 622 + 623 + iter->he = he; 624 + return 0; 625 + } 626 + 627 + static int 628 + iter_finish_normal_entry(struct hist_entry_iter *iter, 629 + struct addr_location *al __maybe_unused) 630 + { 631 + struct hist_entry *he = iter->he; 632 + struct perf_evsel *evsel = iter->evsel; 633 + struct perf_sample *sample = iter->sample; 634 + 635 + if (he == NULL) 636 + return 0; 637 + 638 + iter->he = NULL; 639 + 640 + hists__inc_nr_samples(&evsel->hists, he->filtered); 641 + 642 + return hist_entry__append_callchain(he, sample); 643 + } 644 + 645 + static int 646 + iter_prepare_cumulative_entry(struct hist_entry_iter *iter __maybe_unused, 647 + struct addr_location *al __maybe_unused) 648 + { 649 + struct hist_entry **he_cache; 650 + 651 + callchain_cursor_commit(&callchain_cursor); 652 + 653 + /* 654 + * This is for detecting cycles or recursions so that they're 655 + * cumulated only one time to prevent entries more than 100% 656 + * overhead. 657 + */ 658 + he_cache = malloc(sizeof(*he_cache) * (PERF_MAX_STACK_DEPTH + 1)); 659 + if (he_cache == NULL) 660 + return -ENOMEM; 661 + 662 + iter->priv = he_cache; 663 + iter->curr = 0; 664 + 665 + return 0; 666 + } 667 + 668 + static int 669 + iter_add_single_cumulative_entry(struct hist_entry_iter *iter, 670 + struct addr_location *al) 671 + { 672 + struct perf_evsel *evsel = iter->evsel; 673 + struct perf_sample *sample = iter->sample; 674 + struct hist_entry **he_cache = iter->priv; 675 + struct hist_entry *he; 676 + int err = 0; 677 + 678 + he = __hists__add_entry(&evsel->hists, al, iter->parent, NULL, NULL, 679 + sample->period, sample->weight, 680 + sample->transaction, true); 681 + if (he == NULL) 682 + return -ENOMEM; 683 + 684 + iter->he = he; 685 + he_cache[iter->curr++] = he; 686 + 687 + callchain_append(he->callchain, &callchain_cursor, sample->period); 688 + 689 + /* 690 + * We need to re-initialize the cursor since callchain_append() 691 + * advanced the cursor to the end. 692 + */ 693 + callchain_cursor_commit(&callchain_cursor); 694 + 695 + hists__inc_nr_samples(&evsel->hists, he->filtered); 696 + 697 + return err; 698 + } 699 + 700 + static int 701 + iter_next_cumulative_entry(struct hist_entry_iter *iter, 702 + struct addr_location *al) 703 + { 704 + struct callchain_cursor_node *node; 705 + 706 + node = callchain_cursor_current(&callchain_cursor); 707 + if (node == NULL) 708 + return 0; 709 + 710 + return fill_callchain_info(al, node, iter->hide_unresolved); 711 + } 712 + 713 + static int 714 + iter_add_next_cumulative_entry(struct hist_entry_iter *iter, 715 + struct addr_location *al) 716 + { 717 + struct perf_evsel *evsel = iter->evsel; 718 + struct perf_sample *sample = iter->sample; 719 + struct hist_entry **he_cache = iter->priv; 720 + struct hist_entry *he; 721 + struct hist_entry he_tmp = { 722 + .cpu = al->cpu, 723 + .thread = al->thread, 724 + .comm = thread__comm(al->thread), 725 + .ip = al->addr, 726 + .ms = { 727 + .map = al->map, 728 + .sym = al->sym, 729 + }, 730 + .parent = iter->parent, 731 + }; 732 + int i; 733 + struct callchain_cursor cursor; 734 + 735 + callchain_cursor_snapshot(&cursor, &callchain_cursor); 736 + 737 + callchain_cursor_advance(&callchain_cursor); 738 + 739 + /* 740 + * Check if there's duplicate entries in the callchain. 741 + * It's possible that it has cycles or recursive calls. 742 + */ 743 + for (i = 0; i < iter->curr; i++) { 744 + if (hist_entry__cmp(he_cache[i], &he_tmp) == 0) { 745 + /* to avoid calling callback function */ 746 + iter->he = NULL; 747 + return 0; 748 + } 749 + } 750 + 751 + he = __hists__add_entry(&evsel->hists, al, iter->parent, NULL, NULL, 752 + sample->period, sample->weight, 753 + sample->transaction, false); 754 + if (he == NULL) 755 + return -ENOMEM; 756 + 757 + iter->he = he; 758 + he_cache[iter->curr++] = he; 759 + 760 + callchain_append(he->callchain, &cursor, sample->period); 761 + return 0; 762 + } 763 + 764 + static int 765 + iter_finish_cumulative_entry(struct hist_entry_iter *iter, 766 + struct addr_location *al __maybe_unused) 767 + { 768 + zfree(&iter->priv); 769 + iter->he = NULL; 770 + 771 + return 0; 772 + } 773 + 774 + const struct hist_iter_ops hist_iter_mem = { 775 + .prepare_entry = iter_prepare_mem_entry, 776 + .add_single_entry = iter_add_single_mem_entry, 777 + .next_entry = iter_next_nop_entry, 778 + .add_next_entry = iter_add_next_nop_entry, 779 + .finish_entry = iter_finish_mem_entry, 780 + }; 781 + 782 + const struct hist_iter_ops hist_iter_branch = { 783 + .prepare_entry = iter_prepare_branch_entry, 784 + .add_single_entry = iter_add_single_branch_entry, 785 + .next_entry = iter_next_branch_entry, 786 + .add_next_entry = iter_add_next_branch_entry, 787 + .finish_entry = iter_finish_branch_entry, 788 + }; 789 + 790 + const struct hist_iter_ops hist_iter_normal = { 791 + .prepare_entry = iter_prepare_normal_entry, 792 + .add_single_entry = iter_add_single_normal_entry, 793 + .next_entry = iter_next_nop_entry, 794 + .add_next_entry = iter_add_next_nop_entry, 795 + .finish_entry = iter_finish_normal_entry, 796 + }; 797 + 798 + const struct hist_iter_ops hist_iter_cumulative = { 799 + .prepare_entry = iter_prepare_cumulative_entry, 800 + .add_single_entry = iter_add_single_cumulative_entry, 801 + .next_entry = iter_next_cumulative_entry, 802 + .add_next_entry = iter_add_next_cumulative_entry, 803 + .finish_entry = iter_finish_cumulative_entry, 804 + }; 805 + 806 + int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, 807 + struct perf_evsel *evsel, struct perf_sample *sample, 808 + int max_stack_depth, void *arg) 809 + { 810 + int err, err2; 811 + 812 + err = sample__resolve_callchain(sample, &iter->parent, evsel, al, 813 + max_stack_depth); 814 + if (err) 815 + return err; 816 + 817 + iter->evsel = evsel; 818 + iter->sample = sample; 819 + 820 + err = iter->ops->prepare_entry(iter, al); 821 + if (err) 822 + goto out; 823 + 824 + err = iter->ops->add_single_entry(iter, al); 825 + if (err) 826 + goto out; 827 + 828 + if (iter->he && iter->add_entry_cb) { 829 + err = iter->add_entry_cb(iter, al, true, arg); 830 + if (err) 831 + goto out; 832 + } 833 + 834 + while (iter->ops->next_entry(iter, al)) { 835 + err = iter->ops->add_next_entry(iter, al); 836 + if (err) 837 + break; 838 + 839 + if (iter->he && iter->add_entry_cb) { 840 + err = iter->add_entry_cb(iter, al, false, arg); 841 + if (err) 842 + goto out; 843 + } 844 + } 845 + 846 + out: 847 + err2 = iter->ops->finish_entry(iter, al); 848 + if (!err) 849 + err = err2; 850 + 851 + return err; 459 852 } 460 853 461 854 int64_t ··· 920 469 { 921 470 zfree(&he->branch_info); 922 471 zfree(&he->mem_info); 472 + zfree(&he->stat_acc); 923 473 free_srcline(he->srcline); 924 474 free(he); 925 475 } ··· 946 494 947 495 if (!cmp) { 948 496 he_stat__add_stat(&iter->stat, &he->stat); 497 + if (symbol_conf.cumulate_callchain) 498 + he_stat__add_stat(iter->stat_acc, he->stat_acc); 949 499 950 500 if (symbol_conf.use_callchain) { 951 501 callchain_cursor_reset(&callchain_cursor); ··· 1254 800 events_stats__inc(&hists->stats, type); 1255 801 } 1256 802 803 + void hists__inc_nr_samples(struct hists *hists, bool filtered) 804 + { 805 + events_stats__inc(&hists->stats, PERF_RECORD_SAMPLE); 806 + if (!filtered) 807 + hists->stats.nr_non_filtered_samples++; 808 + } 809 + 1257 810 static struct hist_entry *hists__add_dummy_entry(struct hists *hists, 1258 811 struct hist_entry *pair) 1259 812 { ··· 1292 831 p = &(*p)->rb_right; 1293 832 } 1294 833 1295 - he = hist_entry__new(pair); 834 + he = hist_entry__new(pair, true); 1296 835 if (he) { 1297 836 memset(&he->stat, 0, sizeof(he->stat)); 1298 837 he->hists = hists;
+55 -2
tools/perf/util/hist.h
··· 96 96 u16 col_len[HISTC_NR_COLS]; 97 97 }; 98 98 99 + struct hist_entry_iter; 100 + 101 + struct hist_iter_ops { 102 + int (*prepare_entry)(struct hist_entry_iter *, struct addr_location *); 103 + int (*add_single_entry)(struct hist_entry_iter *, struct addr_location *); 104 + int (*next_entry)(struct hist_entry_iter *, struct addr_location *); 105 + int (*add_next_entry)(struct hist_entry_iter *, struct addr_location *); 106 + int (*finish_entry)(struct hist_entry_iter *, struct addr_location *); 107 + }; 108 + 109 + struct hist_entry_iter { 110 + int total; 111 + int curr; 112 + 113 + bool hide_unresolved; 114 + 115 + struct perf_evsel *evsel; 116 + struct perf_sample *sample; 117 + struct hist_entry *he; 118 + struct symbol *parent; 119 + void *priv; 120 + 121 + const struct hist_iter_ops *ops; 122 + /* user-defined callback function (optional) */ 123 + int (*add_entry_cb)(struct hist_entry_iter *iter, 124 + struct addr_location *al, bool single, void *arg); 125 + }; 126 + 127 + extern const struct hist_iter_ops hist_iter_normal; 128 + extern const struct hist_iter_ops hist_iter_branch; 129 + extern const struct hist_iter_ops hist_iter_mem; 130 + extern const struct hist_iter_ops hist_iter_cumulative; 131 + 99 132 struct hist_entry *__hists__add_entry(struct hists *hists, 100 133 struct addr_location *al, 101 134 struct symbol *parent, 102 135 struct branch_info *bi, 103 136 struct mem_info *mi, u64 period, 104 - u64 weight, u64 transaction); 137 + u64 weight, u64 transaction, 138 + bool sample_self); 139 + int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, 140 + struct perf_evsel *evsel, struct perf_sample *sample, 141 + int max_stack_depth, void *arg); 142 + 105 143 int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right); 106 144 int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right); 107 145 int hist_entry__transaction_len(void); ··· 157 119 void hists__reset_stats(struct hists *hists); 158 120 void hists__inc_stats(struct hists *hists, struct hist_entry *h); 159 121 void hists__inc_nr_events(struct hists *hists, u32 type); 122 + void hists__inc_nr_samples(struct hists *hists, bool filtered); 160 123 void events_stats__inc(struct events_stats *stats, u32 type); 161 124 size_t events_stats__fprintf(struct events_stats *stats, FILE *fp); 162 125 ··· 205 166 206 167 struct list_head list; 207 168 struct list_head sort_list; 169 + bool elide; 208 170 }; 209 171 210 172 extern struct list_head perf_hpp__list; ··· 232 192 PERF_HPP__OVERHEAD_US, 233 193 PERF_HPP__OVERHEAD_GUEST_SYS, 234 194 PERF_HPP__OVERHEAD_GUEST_US, 195 + PERF_HPP__OVERHEAD_ACC, 235 196 PERF_HPP__SAMPLES, 236 197 PERF_HPP__PERIOD, 237 198 ··· 241 200 242 201 void perf_hpp__init(void); 243 202 void perf_hpp__column_register(struct perf_hpp_fmt *format); 203 + void perf_hpp__column_unregister(struct perf_hpp_fmt *format); 244 204 void perf_hpp__column_enable(unsigned col); 205 + void perf_hpp__column_disable(unsigned col); 206 + void perf_hpp__cancel_cumulate(void); 207 + 245 208 void perf_hpp__register_sort_field(struct perf_hpp_fmt *format); 246 209 void perf_hpp__setup_output_field(void); 247 210 void perf_hpp__reset_output_field(void); ··· 253 208 254 209 bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format); 255 210 bool perf_hpp__same_sort_entry(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b); 256 - bool perf_hpp__should_skip(struct perf_hpp_fmt *format); 211 + 212 + static inline bool perf_hpp__should_skip(struct perf_hpp_fmt *format) 213 + { 214 + return format->elide; 215 + } 216 + 257 217 void perf_hpp__reset_width(struct perf_hpp_fmt *fmt, struct hists *hists); 258 218 259 219 typedef u64 (*hpp_field_fn)(struct hist_entry *he); ··· 268 218 int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he, 269 219 hpp_field_fn get_field, const char *fmt, 270 220 hpp_snprint_fn print_fn, bool fmt_percent); 221 + int __hpp__fmt_acc(struct perf_hpp *hpp, struct hist_entry *he, 222 + hpp_field_fn get_field, const char *fmt, 223 + hpp_snprint_fn print_fn, bool fmt_percent); 271 224 272 225 static inline void advance_hpp(struct perf_hpp *hpp, int inc) 273 226 {
+59 -46
tools/perf/util/sort.c
··· 1061 1061 DIM(PERF_HPP__OVERHEAD_US, "overhead_us"), 1062 1062 DIM(PERF_HPP__OVERHEAD_GUEST_SYS, "overhead_guest_sys"), 1063 1063 DIM(PERF_HPP__OVERHEAD_GUEST_US, "overhead_guest_us"), 1064 + DIM(PERF_HPP__OVERHEAD_ACC, "overhead_children"), 1064 1065 DIM(PERF_HPP__SAMPLES, "sample"), 1065 1066 DIM(PERF_HPP__PERIOD, "period"), 1066 1067 }; ··· 1157 1156 1158 1157 INIT_LIST_HEAD(&hse->hpp.list); 1159 1158 INIT_LIST_HEAD(&hse->hpp.sort_list); 1159 + hse->hpp.elide = false; 1160 1160 1161 1161 return hse; 1162 1162 } ··· 1365 1363 return ret; 1366 1364 } 1367 1365 1368 - bool perf_hpp__should_skip(struct perf_hpp_fmt *format) 1366 + void perf_hpp__set_elide(int idx, bool elide) 1369 1367 { 1370 - if (perf_hpp__is_sort_entry(format)) { 1371 - struct hpp_sort_entry *hse; 1368 + struct perf_hpp_fmt *fmt; 1369 + struct hpp_sort_entry *hse; 1372 1370 1373 - hse = container_of(format, struct hpp_sort_entry, hpp); 1374 - return hse->se->elide; 1371 + perf_hpp__for_each_format(fmt) { 1372 + if (!perf_hpp__is_sort_entry(fmt)) 1373 + continue; 1374 + 1375 + hse = container_of(fmt, struct hpp_sort_entry, hpp); 1376 + if (hse->se->se_width_idx == idx) { 1377 + fmt->elide = elide; 1378 + break; 1379 + } 1375 1380 } 1376 - return false; 1377 1381 } 1378 1382 1379 - static void sort_entry__setup_elide(struct sort_entry *se, 1380 - struct strlist *list, 1381 - const char *list_name, FILE *fp) 1383 + static bool __get_elide(struct strlist *list, const char *list_name, FILE *fp) 1382 1384 { 1383 1385 if (list && strlist__nr_entries(list) == 1) { 1384 1386 if (fp != NULL) 1385 1387 fprintf(fp, "# %s: %s\n", list_name, 1386 1388 strlist__entry(list, 0)->s); 1387 - se->elide = true; 1389 + return true; 1388 1390 } 1391 + return false; 1392 + } 1393 + 1394 + static bool get_elide(int idx, FILE *output) 1395 + { 1396 + switch (idx) { 1397 + case HISTC_SYMBOL: 1398 + return __get_elide(symbol_conf.sym_list, "symbol", output); 1399 + case HISTC_DSO: 1400 + return __get_elide(symbol_conf.dso_list, "dso", output); 1401 + case HISTC_COMM: 1402 + return __get_elide(symbol_conf.comm_list, "comm", output); 1403 + default: 1404 + break; 1405 + } 1406 + 1407 + if (sort__mode != SORT_MODE__BRANCH) 1408 + return false; 1409 + 1410 + switch (idx) { 1411 + case HISTC_SYMBOL_FROM: 1412 + return __get_elide(symbol_conf.sym_from_list, "sym_from", output); 1413 + case HISTC_SYMBOL_TO: 1414 + return __get_elide(symbol_conf.sym_to_list, "sym_to", output); 1415 + case HISTC_DSO_FROM: 1416 + return __get_elide(symbol_conf.dso_from_list, "dso_from", output); 1417 + case HISTC_DSO_TO: 1418 + return __get_elide(symbol_conf.dso_to_list, "dso_to", output); 1419 + default: 1420 + break; 1421 + } 1422 + 1423 + return false; 1389 1424 } 1390 1425 1391 1426 void sort__setup_elide(FILE *output) ··· 1430 1391 struct perf_hpp_fmt *fmt; 1431 1392 struct hpp_sort_entry *hse; 1432 1393 1433 - sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, 1434 - "dso", output); 1435 - sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, 1436 - "comm", output); 1437 - sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, 1438 - "symbol", output); 1394 + perf_hpp__for_each_format(fmt) { 1395 + if (!perf_hpp__is_sort_entry(fmt)) 1396 + continue; 1439 1397 1440 - if (sort__mode == SORT_MODE__BRANCH) { 1441 - sort_entry__setup_elide(&sort_dso_from, 1442 - symbol_conf.dso_from_list, 1443 - "dso_from", output); 1444 - sort_entry__setup_elide(&sort_dso_to, 1445 - symbol_conf.dso_to_list, 1446 - "dso_to", output); 1447 - sort_entry__setup_elide(&sort_sym_from, 1448 - symbol_conf.sym_from_list, 1449 - "sym_from", output); 1450 - sort_entry__setup_elide(&sort_sym_to, 1451 - symbol_conf.sym_to_list, 1452 - "sym_to", output); 1453 - } else if (sort__mode == SORT_MODE__MEMORY) { 1454 - sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, 1455 - "symbol_daddr", output); 1456 - sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, 1457 - "dso_daddr", output); 1458 - sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, 1459 - "mem", output); 1460 - sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, 1461 - "local_weight", output); 1462 - sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, 1463 - "tlb", output); 1464 - sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, 1465 - "snoop", output); 1398 + hse = container_of(fmt, struct hpp_sort_entry, hpp); 1399 + fmt->elide = get_elide(hse->se->se_width_idx, output); 1466 1400 } 1467 1401 1468 1402 /* ··· 1446 1434 if (!perf_hpp__is_sort_entry(fmt)) 1447 1435 continue; 1448 1436 1449 - hse = container_of(fmt, struct hpp_sort_entry, hpp); 1450 - if (!hse->se->elide) 1437 + if (!fmt->elide) 1451 1438 return; 1452 1439 } 1453 1440 ··· 1454 1443 if (!perf_hpp__is_sort_entry(fmt)) 1455 1444 continue; 1456 1445 1457 - hse = container_of(fmt, struct hpp_sort_entry, hpp); 1458 - hse->se->elide = false; 1446 + fmt->elide = false; 1459 1447 } 1460 1448 } 1461 1449 ··· 1590 1580 sort__has_parent = 0; 1591 1581 sort__has_sym = 0; 1592 1582 sort__has_dso = 0; 1583 + 1584 + field_order = NULL; 1585 + sort_order = NULL; 1593 1586 1594 1587 reset_dimensions(); 1595 1588 perf_hpp__reset_output_field();
+18 -2
tools/perf/util/sort.h
··· 20 20 21 21 #include "parse-options.h" 22 22 #include "parse-events.h" 23 - 23 + #include "hist.h" 24 24 #include "thread.h" 25 25 26 26 extern regex_t parent_regex; ··· 82 82 struct list_head head; 83 83 } pairs; 84 84 struct he_stat stat; 85 + struct he_stat *stat_acc; 85 86 struct map_symbol ms; 86 87 struct thread *thread; 87 88 struct comm *comm; ··· 130 129 { 131 130 list_add_tail(&pair->pairs.node, &he->pairs.head); 132 131 } 132 + 133 + static inline float hist_entry__get_percent_limit(struct hist_entry *he) 134 + { 135 + u64 period = he->stat.period; 136 + u64 total_period = hists__total_period(he->hists); 137 + 138 + if (unlikely(total_period == 0)) 139 + return 0; 140 + 141 + if (symbol_conf.cumulate_callchain) 142 + period = he->stat_acc->period; 143 + 144 + return period * 100.0 / total_period; 145 + } 146 + 133 147 134 148 enum sort_mode { 135 149 SORT_MODE__NORMAL, ··· 202 186 int (*se_snprintf)(struct hist_entry *he, char *bf, size_t size, 203 187 unsigned int width); 204 188 u8 se_width_idx; 205 - bool elide; 206 189 }; 207 190 208 191 extern struct sort_entry sort_thread; ··· 212 197 void reset_output_field(void); 213 198 extern int sort_dimension__add(const char *); 214 199 void sort__setup_elide(FILE *fp); 200 + void perf_hpp__set_elide(int idx, bool elide); 215 201 216 202 int report_parse_ignore_callees_opt(const struct option *opt, const char *arg, int unset); 217 203
+6 -5
tools/perf/util/symbol.c
··· 29 29 char **vmlinux_path; 30 30 31 31 struct symbol_conf symbol_conf = { 32 - .use_modules = true, 33 - .try_vmlinux_path = true, 34 - .annotate_src = true, 35 - .demangle = true, 36 - .symfs = "", 32 + .use_modules = true, 33 + .try_vmlinux_path = true, 34 + .annotate_src = true, 35 + .demangle = true, 36 + .cumulate_callchain = true, 37 + .symfs = "", 37 38 }; 38 39 39 40 static enum dso_binary_type binary_type_symtab[] = {
+1
tools/perf/util/symbol.h
··· 109 109 show_nr_samples, 110 110 show_total_period, 111 111 use_callchain, 112 + cumulate_callchain, 112 113 exclude_other, 113 114 show_cpu_utilization, 114 115 initialized,