Merge branch 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

-1

arch/x86/include/asm/kdebug.h

··· 18 18 DIE_TRAP, 19 19 DIE_GPF, 20 20 DIE_CALL, 21 - DIE_NMI_IPI, 22 21 DIE_PAGE_FAULT, 23 22 DIE_NMIUNKNOWN, 24 23 };

+11 -1

arch/x86/include/asm/mach_traps.h

··· 7 7 8 8 #include <asm/mc146818rtc.h> 9 9 10 + #define NMI_REASON_PORT 0x61 11 + 12 + #define NMI_REASON_SERR 0x80 13 + #define NMI_REASON_IOCHK 0x40 14 + #define NMI_REASON_MASK (NMI_REASON_SERR | NMI_REASON_IOCHK) 15 + 16 + #define NMI_REASON_CLEAR_SERR 0x04 17 + #define NMI_REASON_CLEAR_IOCHK 0x08 18 + #define NMI_REASON_CLEAR_MASK 0x0f 19 + 10 20 static inline unsigned char get_nmi_reason(void) 11 21 { 12 - return inb(0x61); 22 + return inb(NMI_REASON_PORT); 13 23 } 14 24 15 25 static inline void reassert_nmi(void)

+20

arch/x86/include/asm/nmi.h

··· 23 23 #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace 24 24 #endif 25 25 26 + /* 27 + * Define some priorities for the nmi notifier call chain. 28 + * 29 + * Create a local nmi bit that has a higher priority than 30 + * external nmis, because the local ones are more frequent. 31 + * 32 + * Also setup some default high/normal/low settings for 33 + * subsystems to registers with. Using 4 bits to seperate 34 + * the priorities. This can go alot higher if needed be. 35 + */ 36 + 37 + #define NMI_LOCAL_SHIFT 16 /* randomly picked */ 38 + #define NMI_LOCAL_BIT (1ULL << NMI_LOCAL_SHIFT) 39 + #define NMI_HIGH_PRIOR (1ULL << 8) 40 + #define NMI_NORMAL_PRIOR (1ULL << 4) 41 + #define NMI_LOW_PRIOR (1ULL << 0) 42 + #define NMI_LOCAL_HIGH_PRIOR (NMI_LOCAL_BIT | NMI_HIGH_PRIOR) 43 + #define NMI_LOCAL_NORMAL_PRIOR (NMI_LOCAL_BIT | NMI_NORMAL_PRIOR) 44 + #define NMI_LOCAL_LOW_PRIOR (NMI_LOCAL_BIT | NMI_LOW_PRIOR) 45 + 26 46 void stop_nmi(void); 27 47 void restart_nmi(void); 28 48

+3

arch/x86/include/asm/perf_event_p4.h

··· 20 20 #define ARCH_P4_MAX_ESCR (ARCH_P4_TOTAL_ESCR - ARCH_P4_RESERVED_ESCR) 21 21 #define ARCH_P4_MAX_CCCR (18) 22 22 23 + #define ARCH_P4_CNTRVAL_BITS (40) 24 + #define ARCH_P4_CNTRVAL_MASK ((1ULL << ARCH_P4_CNTRVAL_BITS) - 1) 25 + 23 26 #define P4_ESCR_EVENT_MASK 0x7e000000U 24 27 #define P4_ESCR_EVENT_SHIFT 25 25 28 #define P4_ESCR_EVENTMASK_MASK 0x01fffe00U

+1 -2

arch/x86/kernel/apic/hw_nmi.c

··· 68 68 69 69 switch (cmd) { 70 70 case DIE_NMI: 71 - case DIE_NMI_IPI: 72 71 break; 73 72 74 73 default: ··· 95 96 static __read_mostly struct notifier_block backtrace_notifier = { 96 97 .notifier_call = arch_trigger_all_cpu_backtrace_handler, 97 98 .next = NULL, 98 - .priority = 1 99 + .priority = NMI_LOCAL_LOW_PRIOR, 99 100 }; 100 101 101 102 static int __init register_trigger_all_cpu_backtrace(void)

+1 -1

arch/x86/kernel/apic/x2apic_uv_x.c

··· 641 641 */ 642 642 int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) 643 643 { 644 - if (reason != DIE_NMI_IPI) 644 + if (reason != DIE_NMIUNKNOWN) 645 645 return NOTIFY_OK; 646 646 647 647 if (in_crash_kexec)

+3 -2

arch/x86/kernel/cpu/mcheck/mce-inject.c

··· 25 25 #include <linux/gfp.h> 26 26 #include <asm/mce.h> 27 27 #include <asm/apic.h> 28 + #include <asm/nmi.h> 28 29 29 30 /* Update fake mce registers on current CPU. */ 30 31 static void inject_mce(struct mce *m) ··· 84 83 struct die_args *args = (struct die_args *)data; 85 84 int cpu = smp_processor_id(); 86 85 struct mce *m = &__get_cpu_var(injectm); 87 - if (val != DIE_NMI_IPI || !cpumask_test_cpu(cpu, mce_inject_cpumask)) 86 + if (val != DIE_NMI || !cpumask_test_cpu(cpu, mce_inject_cpumask)) 88 87 return NOTIFY_DONE; 89 88 cpumask_clear_cpu(cpu, mce_inject_cpumask); 90 89 if (m->inject_flags & MCJ_EXCEPTION) ··· 96 95 97 96 static struct notifier_block mce_raise_nb = { 98 97 .notifier_call = mce_raise_notify, 99 - .priority = 1000, 98 + .priority = NMI_LOCAL_NORMAL_PRIOR, 100 99 }; 101 100 102 101 /* Inject mce on current CPU */

+1 -2

arch/x86/kernel/cpu/perf_event.c

··· 1267 1267 1268 1268 switch (cmd) { 1269 1269 case DIE_NMI: 1270 - case DIE_NMI_IPI: 1271 1270 break; 1272 1271 case DIE_NMIUNKNOWN: 1273 1272 this_nmi = percpu_read(irq_stat.__nmi_count); ··· 1316 1317 static __read_mostly struct notifier_block perf_event_nmi_notifier = { 1317 1318 .notifier_call = perf_event_nmi_handler, 1318 1319 .next = NULL, 1319 - .priority = 1 1320 + .priority = NMI_LOCAL_LOW_PRIOR, 1320 1321 }; 1321 1322 1322 1323 static struct event_constraint unconstrained;

+15 -13

arch/x86/kernel/cpu/perf_event_p4.c

··· 753 753 754 754 static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc) 755 755 { 756 - int overflow = 0; 757 - u32 low, high; 756 + u64 v; 758 757 759 - rdmsr(hwc->config_base + hwc->idx, low, high); 760 - 761 - /* we need to check high bit for unflagged overflows */ 762 - if ((low & P4_CCCR_OVF) || !(high & (1 << 31))) { 763 - overflow = 1; 764 - (void)checking_wrmsrl(hwc->config_base + hwc->idx, 765 - ((u64)low) & ~P4_CCCR_OVF); 758 + /* an official way for overflow indication */ 759 + rdmsrl(hwc->config_base + hwc->idx, v); 760 + if (v & P4_CCCR_OVF) { 761 + wrmsrl(hwc->config_base + hwc->idx, v & ~P4_CCCR_OVF); 762 + return 1; 766 763 } 767 764 768 - return overflow; 765 + /* it might be unflagged overflow */ 766 + rdmsrl(hwc->event_base + hwc->idx, v); 767 + if (!(v & ARCH_P4_CNTRVAL_MASK)) 768 + return 1; 769 + 770 + return 0; 769 771 } 770 772 771 773 static void p4_pmu_disable_pebs(void) ··· 1154 1152 */ 1155 1153 .num_counters = ARCH_P4_MAX_CCCR, 1156 1154 .apic = 1, 1157 - .cntval_bits = 40, 1158 - .cntval_mask = (1ULL << 40) - 1, 1159 - .max_period = (1ULL << 39) - 1, 1155 + .cntval_bits = ARCH_P4_CNTRVAL_BITS, 1156 + .cntval_mask = ARCH_P4_CNTRVAL_MASK, 1157 + .max_period = (1ULL << (ARCH_P4_CNTRVAL_BITS - 1)) - 1, 1160 1158 .hw_config = p4_hw_config, 1161 1159 .schedule_events = p4_pmu_schedule_events, 1162 1160 /*

-6

arch/x86/kernel/dumpstack.c

··· 197 197 */ 198 198 void dump_stack(void) 199 199 { 200 - unsigned long bp = 0; 201 200 unsigned long stack; 202 - 203 - #ifdef CONFIG_FRAME_POINTER 204 - if (!bp) 205 - get_bp(bp); 206 - #endif 207 201 208 202 printk("Pid: %d, comm: %.20s %s %s %.*s\n", 209 203 current->pid, current->comm, print_tainted(),

+22 -12

arch/x86/kernel/entry_64.S

··· 299 299 ENTRY(save_args) 300 300 XCPT_FRAME 301 301 cld 302 - movq_cfi rdi, RDI+16-ARGOFFSET 303 - movq_cfi rsi, RSI+16-ARGOFFSET 304 - movq_cfi rdx, RDX+16-ARGOFFSET 305 - movq_cfi rcx, RCX+16-ARGOFFSET 306 - movq_cfi rax, RAX+16-ARGOFFSET 307 - movq_cfi r8, R8+16-ARGOFFSET 308 - movq_cfi r9, R9+16-ARGOFFSET 309 - movq_cfi r10, R10+16-ARGOFFSET 310 - movq_cfi r11, R11+16-ARGOFFSET 302 + /* 303 + * start from rbp in pt_regs and jump over 304 + * return address. 305 + */ 306 + movq_cfi rdi, RDI+8-RBP 307 + movq_cfi rsi, RSI+8-RBP 308 + movq_cfi rdx, RDX+8-RBP 309 + movq_cfi rcx, RCX+8-RBP 310 + movq_cfi rax, RAX+8-RBP 311 + movq_cfi r8, R8+8-RBP 312 + movq_cfi r9, R9+8-RBP 313 + movq_cfi r10, R10+8-RBP 314 + movq_cfi r11, R11+8-RBP 311 315 312 - leaq -ARGOFFSET+16(%rsp),%rdi /* arg1 for handler */ 316 + leaq -RBP+8(%rsp),%rdi /* arg1 for handler */ 313 317 movq_cfi rbp, 8 /* push %rbp */ 314 318 leaq 8(%rsp), %rbp /* mov %rsp, %ebp */ 315 319 testl $3, CS(%rdi) ··· 786 782 787 783 /* 0(%rsp): ~(interrupt number) */ 788 784 .macro interrupt func 789 - subq $ORIG_RAX-ARGOFFSET+8, %rsp 790 - CFI_ADJUST_CFA_OFFSET ORIG_RAX-ARGOFFSET+8 785 + /* reserve pt_regs for scratch regs and rbp */ 786 + subq $ORIG_RAX-RBP, %rsp 787 + CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP 791 788 call save_args 792 789 PARTIAL_FRAME 0 793 790 call \func ··· 813 808 TRACE_IRQS_OFF 814 809 decl PER_CPU_VAR(irq_count) 815 810 leaveq 811 + 816 812 CFI_RESTORE rbp 817 813 CFI_DEF_CFA_REGISTER rsp 814 + CFI_ADJUST_CFA_OFFSET -8 815 + 816 + /* we did not save rbx, restore only from ARGOFFSET */ 817 + addq $8, %rsp 818 818 CFI_ADJUST_CFA_OFFSET -8 819 819 exit_intr: 820 820 GET_THREAD_INFO(%rcx)

+2 -5

arch/x86/kernel/kgdb.c

··· 48 48 #include <asm/apicdef.h> 49 49 #include <asm/system.h> 50 50 #include <asm/apic.h> 51 + #include <asm/nmi.h> 51 52 52 53 struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = 53 54 { ··· 526 525 } 527 526 return NOTIFY_DONE; 528 527 529 - case DIE_NMI_IPI: 530 - /* Just ignore, we will handle the roundup on DIE_NMI. */ 531 - return NOTIFY_DONE; 532 - 533 528 case DIE_NMIUNKNOWN: 534 529 if (was_in_debug_nmi[raw_smp_processor_id()]) { 535 530 was_in_debug_nmi[raw_smp_processor_id()] = 0; ··· 603 606 /* 604 607 * Lowest-prio notifier priority, we want to be notified last: 605 608 */ 606 - .priority = -INT_MAX, 609 + .priority = NMI_LOCAL_LOW_PRIOR, 607 610 }; 608 611 609 612 /**

+4 -1

arch/x86/kernel/reboot.c

··· 18 18 #include <asm/pci_x86.h> 19 19 #include <asm/virtext.h> 20 20 #include <asm/cpu.h> 21 + #include <asm/nmi.h> 21 22 22 23 #ifdef CONFIG_X86_32 23 24 # include <linux/ctype.h> ··· 748 747 { 749 748 int cpu; 750 749 751 - if (val != DIE_NMI_IPI) 750 + if (val != DIE_NMI) 752 751 return NOTIFY_OK; 753 752 754 753 cpu = raw_smp_processor_id(); ··· 779 778 780 779 static struct notifier_block crash_nmi_nb = { 781 780 .notifier_call = crash_nmi_callback, 781 + /* we want to be the first one called */ 782 + .priority = NMI_LOCAL_HIGH_PRIOR+1, 782 783 }; 783 784 784 785 /* Halt all other CPUs, calling the specified function on each of them

+51 -51

arch/x86/kernel/traps.c

··· 84 84 static int ignore_nmis; 85 85 86 86 int unknown_nmi_panic; 87 + /* 88 + * Prevent NMI reason port (0x61) being accessed simultaneously, can 89 + * only be used in NMI handler. 90 + */ 91 + static DEFINE_RAW_SPINLOCK(nmi_reason_lock); 87 92 88 93 static inline void conditional_sti(struct pt_regs *regs) 89 94 { ··· 315 310 __setup("unknown_nmi_panic", setup_unknown_nmi_panic); 316 311 317 312 static notrace __kprobes void 318 - mem_parity_error(unsigned char reason, struct pt_regs *regs) 313 + pci_serr_error(unsigned char reason, struct pt_regs *regs) 319 314 { 320 - printk(KERN_EMERG 321 - "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", 322 - reason, smp_processor_id()); 315 + pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n", 316 + reason, smp_processor_id()); 323 317 324 - printk(KERN_EMERG 325 - "You have some hardware problem, likely on the PCI bus.\n"); 326 - 318 + /* 319 + * On some machines, PCI SERR line is used to report memory 320 + * errors. EDAC makes use of it. 321 + */ 327 322 #if defined(CONFIG_EDAC) 328 323 if (edac_handler_set()) { 329 324 edac_atomic_assert_error(); ··· 334 329 if (panic_on_unrecovered_nmi) 335 330 panic("NMI: Not continuing"); 336 331 337 - printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); 332 + pr_emerg("Dazed and confused, but trying to continue\n"); 338 333 339 - /* Clear and disable the memory parity error line. */ 340 - reason = (reason & 0xf) | 4; 341 - outb(reason, 0x61); 334 + /* Clear and disable the PCI SERR error line. */ 335 + reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR; 336 + outb(reason, NMI_REASON_PORT); 342 337 } 343 338 344 339 static notrace __kprobes void ··· 346 341 { 347 342 unsigned long i; 348 343 349 - printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n"); 344 + pr_emerg( 345 + "NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n", 346 + reason, smp_processor_id()); 350 347 show_registers(regs); 351 348 352 349 if (panic_on_io_nmi) 353 350 panic("NMI IOCK error: Not continuing"); 354 351 355 352 /* Re-enable the IOCK line, wait for a few seconds */ 356 - reason = (reason & 0xf) | 8; 357 - outb(reason, 0x61); 353 + reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK; 354 + outb(reason, NMI_REASON_PORT); 358 355 359 356 i = 20000; 360 357 while (--i) { ··· 364 357 udelay(100); 365 358 } 366 359 367 - reason &= ~8; 368 - outb(reason, 0x61); 360 + reason &= ~NMI_REASON_CLEAR_IOCHK; 361 + outb(reason, NMI_REASON_PORT); 369 362 } 370 363 371 364 static notrace __kprobes void ··· 384 377 return; 385 378 } 386 379 #endif 387 - printk(KERN_EMERG 388 - "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", 389 - reason, smp_processor_id()); 380 + pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", 381 + reason, smp_processor_id()); 390 382 391 - printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n"); 383 + pr_emerg("Do you have a strange power saving mode enabled?\n"); 392 384 if (unknown_nmi_panic || panic_on_unrecovered_nmi) 393 385 panic("NMI: Not continuing"); 394 386 395 - printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); 387 + pr_emerg("Dazed and confused, but trying to continue\n"); 396 388 } 397 389 398 390 static notrace __kprobes void default_do_nmi(struct pt_regs *regs) 399 391 { 400 392 unsigned char reason = 0; 401 - int cpu; 402 393 403 - cpu = smp_processor_id(); 394 + /* 395 + * CPU-specific NMI must be processed before non-CPU-specific 396 + * NMI, otherwise we may lose it, because the CPU-specific 397 + * NMI can not be detected/processed on other CPUs. 398 + */ 399 + if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP) 400 + return; 404 401 405 - /* Only the BSP gets external NMIs from the system. */ 406 - if (!cpu) 407 - reason = get_nmi_reason(); 402 + /* Non-CPU-specific NMI: NMI sources can be processed on any CPU */ 403 + raw_spin_lock(&nmi_reason_lock); 404 + reason = get_nmi_reason(); 408 405 409 - if (!(reason & 0xc0)) { 410 - if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT) 411 - == NOTIFY_STOP) 412 - return; 413 - 414 - #ifdef CONFIG_X86_LOCAL_APIC 415 - if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) 416 - == NOTIFY_STOP) 417 - return; 406 + if (reason & NMI_REASON_MASK) { 407 + if (reason & NMI_REASON_SERR) 408 + pci_serr_error(reason, regs); 409 + else if (reason & NMI_REASON_IOCHK) 410 + io_check_error(reason, regs); 411 + #ifdef CONFIG_X86_32 412 + /* 413 + * Reassert NMI in case it became active 414 + * meanwhile as it's edge-triggered: 415 + */ 416 + reassert_nmi(); 418 417 #endif 419 - unknown_nmi_error(reason, regs); 420 - 418 + raw_spin_unlock(&nmi_reason_lock); 421 419 return; 422 420 } 423 - if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) 424 - return; 421 + raw_spin_unlock(&nmi_reason_lock); 425 422 426 - /* AK: following checks seem to be broken on modern chipsets. FIXME */ 427 - if (reason & 0x80) 428 - mem_parity_error(reason, regs); 429 - if (reason & 0x40) 430 - io_check_error(reason, regs); 431 - #ifdef CONFIG_X86_32 432 - /* 433 - * Reassert NMI in case it became active meanwhile 434 - * as it's edge-triggered: 435 - */ 436 - reassert_nmi(); 437 - #endif 423 + unknown_nmi_error(reason, regs); 438 424 } 439 425 440 426 dotraplinkage notrace __kprobes void

+1 -2

arch/x86/oprofile/nmi_int.c

··· 65 65 66 66 switch (val) { 67 67 case DIE_NMI: 68 - case DIE_NMI_IPI: 69 68 if (ctr_running) 70 69 model->check_ctrs(args->regs, &__get_cpu_var(cpu_msrs)); 71 70 else if (!nmi_enabled) ··· 360 361 static struct notifier_block profile_exceptions_nb = { 361 362 .notifier_call = profile_exceptions_notify, 362 363 .next = NULL, 363 - .priority = 2 364 + .priority = NMI_LOCAL_LOW_PRIOR, 364 365 }; 365 366 366 367 static void nmi_cpu_restore_registers(struct op_msrs *msrs)

+1 -1

arch/x86/oprofile/nmi_timer_int.c

··· 38 38 static struct notifier_block profile_timer_exceptions_nb = { 39 39 .notifier_call = profile_timer_exceptions_notify, 40 40 .next = NULL, 41 - .priority = 0 41 + .priority = NMI_LOW_PRIOR, 42 42 }; 43 43 44 44 static int timer_start(void)

+1 -1

drivers/char/ipmi/ipmi_watchdog.c

··· 1081 1081 { 1082 1082 struct die_args *args = data; 1083 1083 1084 - if (val != DIE_NMI) 1084 + if (val != DIE_NMIUNKNOWN) 1085 1085 return NOTIFY_OK; 1086 1086 1087 1087 /* Hack, if it's a memory or I/O error, ignore it. */

+1 -1

drivers/watchdog/hpwdt.c

··· 469 469 unsigned long rom_pl; 470 470 static int die_nmi_called; 471 471 472 - if (ulReason != DIE_NMI && ulReason != DIE_NMI_IPI) 472 + if (ulReason != DIE_NMIUNKNOWN) 473 473 goto out; 474 474 475 475 if (!hpwdt_nmi_decoding)

+4 -14

include/linux/dynamic_debug.h

··· 44 44 extern int ddebug_remove_module(const char *mod_name); 45 45 46 46 #define dynamic_pr_debug(fmt, ...) do { \ 47 - __label__ do_printk; \ 48 - __label__ out; \ 49 47 static struct _ddebug descriptor \ 50 48 __used \ 51 49 __attribute__((section("__verbose"), aligned(8))) = \ 52 50 { KBUILD_MODNAME, __func__, __FILE__, fmt, __LINE__, \ 53 51 _DPRINTK_FLAGS_DEFAULT }; \ 54 - JUMP_LABEL(&descriptor.enabled, do_printk); \ 55 - goto out; \ 56 - do_printk: \ 57 - printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); \ 58 - out: ; \ 52 + if (unlikely(descriptor.enabled)) \ 53 + printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__); \ 59 54 } while (0) 60 55 61 56 62 57 #define dynamic_dev_dbg(dev, fmt, ...) do { \ 63 - __label__ do_printk; \ 64 - __label__ out; \ 65 58 static struct _ddebug descriptor \ 66 59 __used \ 67 60 __attribute__((section("__verbose"), aligned(8))) = \ 68 61 { KBUILD_MODNAME, __func__, __FILE__, fmt, __LINE__, \ 69 62 _DPRINTK_FLAGS_DEFAULT }; \ 70 - JUMP_LABEL(&descriptor.enabled, do_printk); \ 71 - goto out; \ 72 - do_printk: \ 73 - dev_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__); \ 74 - out: ; \ 63 + if (unlikely(descriptor.enabled)) \ 64 + dev_printk(KERN_DEBUG, dev, fmt, ##__VA_ARGS__); \ 75 65 } while (0) 76 66 77 67 #else

+2 -2

include/linux/tracepoint.h

··· 32 32 int state; /* State. */ 33 33 void (*regfunc)(void); 34 34 void (*unregfunc)(void); 35 - struct tracepoint_func *funcs; 35 + struct tracepoint_func __rcu *funcs; 36 36 } __attribute__((aligned(32))); /* 37 37 * Aligned on 32 bytes because it is 38 38 * globally visible and gcc happily ··· 326 326 * memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); 327 327 * __entry->next_pid = next->pid; 328 328 * __entry->next_prio = next->prio; 329 - * ) 329 + * ), 330 330 * 331 331 * * 332 332 * * Formatted output of a trace record via TP_printk().

+10

include/trace/define_trace.h

··· 21 21 #undef CREATE_TRACE_POINTS 22 22 23 23 #include <linux/stringify.h> 24 + /* 25 + * module.h includes tracepoints, and because ftrace.h 26 + * pulls in module.h: 27 + * trace/ftrace.h -> linux/ftrace_event.h -> linux/perf_event.h -> 28 + * linux/ftrace.h -> linux/module.h 29 + * we must include module.h here before we play with any of 30 + * the TRACE_EVENT() macros, otherwise the tracepoints included 31 + * by module.h may break the build. 32 + */ 33 + #include <linux/module.h> 24 34 25 35 #undef TRACE_EVENT 26 36 #define TRACE_EVENT(name, proto, args, tstruct, assign, print) \

+1 -3

include/trace/events/skb.h

··· 25 25 26 26 TP_fast_assign( 27 27 __entry->skbaddr = skb; 28 - if (skb) { 29 - __entry->protocol = ntohs(skb->protocol); 30 - } 28 + __entry->protocol = ntohs(skb->protocol); 31 29 __entry->location = location; 32 30 ), 33 31

+1

kernel/Makefile

··· 100 100 obj-$(CONFIG_TRACING) += trace/ 101 101 obj-$(CONFIG_X86_DS) += trace/ 102 102 obj-$(CONFIG_RING_BUFFER) += trace/ 103 + obj-$(CONFIG_TRACEPOINTS) += trace/ 103 104 obj-$(CONFIG_SMP) += sched_cpupri.o 104 105 obj-$(CONFIG_IRQ_WORK) += irq_work.o 105 106 obj-$(CONFIG_PERF_EVENTS) += perf_event.o

+9 -5

kernel/exit.c

··· 994 994 exit_fs(tsk); 995 995 check_stack_usage(); 996 996 exit_thread(); 997 + 998 + /* 999 + * Flush inherited counters to the parent - before the parent 1000 + * gets woken up by child-exit notifications. 1001 + * 1002 + * because of cgroup mode, must be called before cgroup_exit() 1003 + */ 1004 + perf_event_exit_task(tsk); 1005 + 997 1006 cgroup_exit(tsk, 1); 998 1007 999 1008 if (group_dead) ··· 1016 1007 * FIXME: do that only when needed, using sched_exit tracepoint 1017 1008 */ 1018 1009 flush_ptrace_hw_breakpoint(tsk); 1019 - /* 1020 - * Flush inherited counters to the parent - before the parent 1021 - * gets woken up by child-exit notifications. 1022 - */ 1023 - perf_event_exit_task(tsk); 1024 1010 1025 1011 exit_notify(tsk, group_dead); 1026 1012 #ifdef CONFIG_NUMA

+49 -33

kernel/perf_event.c

··· 38 38 39 39 #include <asm/irq_regs.h> 40 40 41 + enum event_type_t { 42 + EVENT_FLEXIBLE = 0x1, 43 + EVENT_PINNED = 0x2, 44 + EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED, 45 + }; 46 + 41 47 atomic_t perf_task_events __read_mostly; 42 48 static atomic_t nr_mmap_events __read_mostly; 43 49 static atomic_t nr_comm_events __read_mostly; ··· 71 65 72 66 static atomic64_t perf_event_id; 73 67 68 + static void cpu_ctx_sched_out(struct perf_cpu_context *cpuctx, 69 + enum event_type_t event_type); 70 + 71 + static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, 72 + enum event_type_t event_type); 73 + 74 74 void __weak perf_event_print_debug(void) { } 75 75 76 76 extern __weak const char *perf_pmu_name(void) 77 77 { 78 78 return "pmu"; 79 + } 80 + 81 + static inline u64 perf_clock(void) 82 + { 83 + return local_clock(); 79 84 } 80 85 81 86 void perf_pmu_disable(struct pmu *pmu) ··· 257 240 put_ctx(ctx); 258 241 } 259 242 260 - static inline u64 perf_clock(void) 261 - { 262 - return local_clock(); 263 - } 264 - 265 243 /* 266 244 * Update the record of the current time in a context. 267 245 */ ··· 266 254 267 255 ctx->time += now - ctx->timestamp; 268 256 ctx->timestamp = now; 257 + } 258 + 259 + static u64 perf_event_time(struct perf_event *event) 260 + { 261 + struct perf_event_context *ctx = event->ctx; 262 + return ctx ? ctx->time : 0; 269 263 } 270 264 271 265 /* ··· 287 269 return; 288 270 289 271 if (ctx->is_active) 290 - run_end = ctx->time; 272 + run_end = perf_event_time(event); 291 273 else 292 274 run_end = event->tstamp_stopped; 293 275 ··· 296 278 if (event->state == PERF_EVENT_STATE_INACTIVE) 297 279 run_end = event->tstamp_stopped; 298 280 else 299 - run_end = ctx->time; 281 + run_end = perf_event_time(event); 300 282 301 283 event->total_time_running = run_end - event->tstamp_running; 302 284 } ··· 552 534 struct perf_cpu_context *cpuctx, 553 535 struct perf_event_context *ctx) 554 536 { 537 + u64 tstamp = perf_event_time(event); 555 538 u64 delta; 556 539 /* 557 540 * An event which could not be activated because of ··· 564 545 && !event_filter_match(event)) { 565 546 delta = ctx->time - event->tstamp_stopped; 566 547 event->tstamp_running += delta; 567 - event->tstamp_stopped = ctx->time; 548 + event->tstamp_stopped = tstamp; 568 549 } 569 550 570 551 if (event->state != PERF_EVENT_STATE_ACTIVE) ··· 575 556 event->pending_disable = 0; 576 557 event->state = PERF_EVENT_STATE_OFF; 577 558 } 578 - event->tstamp_stopped = ctx->time; 559 + event->tstamp_stopped = tstamp; 579 560 event->pmu->del(event, 0); 580 561 event->oncpu = -1; 581 562 ··· 787 768 struct perf_cpu_context *cpuctx, 788 769 struct perf_event_context *ctx) 789 770 { 771 + u64 tstamp = perf_event_time(event); 772 + 790 773 if (event->state <= PERF_EVENT_STATE_OFF) 791 774 return 0; 792 775 ··· 805 784 return -EAGAIN; 806 785 } 807 786 808 - event->tstamp_running += ctx->time - event->tstamp_stopped; 787 + event->tstamp_running += tstamp - event->tstamp_stopped; 809 788 810 - event->shadow_ctx_time = ctx->time - ctx->timestamp; 789 + event->shadow_ctx_time = tstamp - ctx->timestamp; 811 790 812 791 if (!is_software_event(event)) 813 792 cpuctx->active_oncpu++; ··· 919 898 static void add_event_to_ctx(struct perf_event *event, 920 899 struct perf_event_context *ctx) 921 900 { 901 + u64 tstamp = perf_event_time(event); 902 + 922 903 list_add_event(event, ctx); 923 904 perf_group_attach(event); 924 - event->tstamp_enabled = ctx->time; 925 - event->tstamp_running = ctx->time; 926 - event->tstamp_stopped = ctx->time; 905 + event->tstamp_enabled = tstamp; 906 + event->tstamp_running = tstamp; 907 + event->tstamp_stopped = tstamp; 927 908 } 928 909 929 910 /* ··· 960 937 961 938 add_event_to_ctx(event, ctx); 962 939 963 - if (event->cpu != -1 && event->cpu != smp_processor_id()) 940 + if (!event_filter_match(event)) 964 941 goto unlock; 965 942 966 943 /* ··· 1065 1042 struct perf_event_context *ctx) 1066 1043 { 1067 1044 struct perf_event *sub; 1045 + u64 tstamp = perf_event_time(event); 1068 1046 1069 1047 event->state = PERF_EVENT_STATE_INACTIVE; 1070 - event->tstamp_enabled = ctx->time - event->total_time_enabled; 1048 + event->tstamp_enabled = tstamp - event->total_time_enabled; 1071 1049 list_for_each_entry(sub, &event->sibling_list, group_entry) { 1072 - if (sub->state >= PERF_EVENT_STATE_INACTIVE) { 1073 - sub->tstamp_enabled = 1074 - ctx->time - sub->total_time_enabled; 1075 - } 1050 + if (sub->state >= PERF_EVENT_STATE_INACTIVE) 1051 + sub->tstamp_enabled = tstamp - sub->total_time_enabled; 1076 1052 } 1077 1053 } 1078 1054 ··· 1104 1082 goto unlock; 1105 1083 __perf_event_mark_enabled(event, ctx); 1106 1084 1107 - if (event->cpu != -1 && event->cpu != smp_processor_id()) 1085 + if (!event_filter_match(event)) 1108 1086 goto unlock; 1109 1087 1110 1088 /* ··· 1214 1192 1215 1193 return 0; 1216 1194 } 1217 - 1218 - enum event_type_t { 1219 - EVENT_FLEXIBLE = 0x1, 1220 - EVENT_PINNED = 0x2, 1221 - EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED, 1222 - }; 1223 1195 1224 1196 static void ctx_sched_out(struct perf_event_context *ctx, 1225 1197 struct perf_cpu_context *cpuctx, ··· 1451 1435 list_for_each_entry(event, &ctx->pinned_groups, group_entry) { 1452 1436 if (event->state <= PERF_EVENT_STATE_OFF) 1453 1437 continue; 1454 - if (event->cpu != -1 && event->cpu != smp_processor_id()) 1438 + if (!event_filter_match(event)) 1455 1439 continue; 1456 1440 1457 1441 if (group_can_go_on(event, cpuctx, 1)) ··· 1483 1467 * Listen to the 'cpu' scheduling filter constraint 1484 1468 * of events: 1485 1469 */ 1486 - if (event->cpu != -1 && event->cpu != smp_processor_id()) 1470 + if (!event_filter_match(event)) 1487 1471 continue; 1488 1472 1489 1473 if (group_can_go_on(event, cpuctx, can_add_hw)) { ··· 1710 1694 if (event->state != PERF_EVENT_STATE_ACTIVE) 1711 1695 continue; 1712 1696 1713 - if (event->cpu != -1 && event->cpu != smp_processor_id()) 1697 + if (!event_filter_match(event)) 1714 1698 continue; 1715 1699 1716 1700 hwc = &event->hw; ··· 3909 3893 if (event->state < PERF_EVENT_STATE_INACTIVE) 3910 3894 return 0; 3911 3895 3912 - if (event->cpu != -1 && event->cpu != smp_processor_id()) 3896 + if (!event_filter_match(event)) 3913 3897 return 0; 3914 3898 3915 3899 if (event->attr.comm || event->attr.mmap || ··· 4046 4030 if (event->state < PERF_EVENT_STATE_INACTIVE) 4047 4031 return 0; 4048 4032 4049 - if (event->cpu != -1 && event->cpu != smp_processor_id()) 4033 + if (!event_filter_match(event)) 4050 4034 return 0; 4051 4035 4052 4036 if (event->attr.comm) ··· 4194 4178 if (event->state < PERF_EVENT_STATE_INACTIVE) 4195 4179 return 0; 4196 4180 4197 - if (event->cpu != -1 && event->cpu != smp_processor_id()) 4181 + if (!event_filter_match(event)) 4198 4182 return 0; 4199 4183 4200 4184 if ((!executable && event->attr.mmap_data) ||

+1 -1

kernel/trace/Makefile

··· 52 52 endif 53 53 obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o 54 54 obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o 55 - obj-$(CONFIG_EVENT_TRACING) += power-traces.o 55 + obj-$(CONFIG_TRACEPOINTS) += power-traces.o 56 56 ifeq ($(CONFIG_TRACING),y) 57 57 obj-$(CONFIG_KGDB_KDB) += trace_kdb.o 58 58 endif

+2 -4

kernel/trace/trace.c

··· 1313 1313 1314 1314 __this_cpu_inc(user_stack_count); 1315 1315 1316 - 1317 - 1318 1316 event = trace_buffer_lock_reserve(buffer, TRACE_USER_STACK, 1319 1317 sizeof(*entry), flags, pc); 1320 1318 if (!event) 1321 - return; 1319 + goto out_drop_count; 1322 1320 entry = ring_buffer_event_data(event); 1323 1321 1324 1322 entry->tgid = current->tgid; ··· 1331 1333 if (!filter_check_discard(call, entry, buffer, event)) 1332 1334 ring_buffer_unlock_commit(buffer, event); 1333 1335 1336 + out_drop_count: 1334 1337 __this_cpu_dec(user_stack_count); 1335 - 1336 1338 out: 1337 1339 preempt_enable(); 1338 1340 }

+4 -5

lib/dynamic_debug.c

··· 141 141 else if (!dp->flags) 142 142 dt->num_enabled++; 143 143 dp->flags = newflags; 144 - if (newflags) { 145 - jump_label_enable(&dp->enabled); 146 - } else { 147 - jump_label_disable(&dp->enabled); 148 - } 144 + if (newflags) 145 + dp->enabled = 1; 146 + else 147 + dp->enabled = 0; 149 148 if (verbose) 150 149 printk(KERN_INFO 151 150 "ddebug: changed %s:%d [%s]%s %s\n",

+1 -1

tools/perf/Makefile

··· 227 227 CFLAGS_OPTIMIZE = -O6 228 228 endif 229 229 230 - CFLAGS = -ggdb3 -Wall -Wextra -std=gnu99 -Werror $(CFLAGS_OPTIMIZE) -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) 230 + CFLAGS = -fno-omit-frame-pointer -ggdb3 -Wall -Wextra -std=gnu99 -Werror $(CFLAGS_OPTIMIZE) -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) 231 231 EXTLIBS = -lpthread -lrt -lelf -lm 232 232 ALL_CFLAGS = $(CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 233 233 ALL_LDFLAGS = $(LDFLAGS)

+3

tools/perf/builtin-record.c

··· 331 331 else if (err == ENODEV && cpu_list) { 332 332 die("No such device - did you specify" 333 333 " an out-of-range profile CPU?\n"); 334 + } else if (err == ENOENT) { 335 + die("%s event is not supported. ", 336 + event_name(evsel)); 334 337 } else if (err == EINVAL && sample_id_all_avail) { 335 338 /* 336 339 * Old kernel, no attr->sample_id_type_all field

+3 -2

tools/perf/builtin-sched.c

··· 489 489 490 490 err = pthread_attr_init(&attr); 491 491 BUG_ON(err); 492 - err = pthread_attr_setstacksize(&attr, (size_t)(16*1024)); 492 + err = pthread_attr_setstacksize(&attr, 493 + (size_t) max(16 * 1024, PTHREAD_STACK_MIN)); 493 494 BUG_ON(err); 494 495 err = pthread_mutex_lock(&start_work_mutex); 495 496 BUG_ON(err); ··· 1862 1861 rec_argc = ARRAY_SIZE(record_args) + argc - 1; 1863 1862 rec_argv = calloc(rec_argc + 1, sizeof(char *)); 1864 1863 1865 - if (rec_argv) 1864 + if (rec_argv == NULL) 1866 1865 return -ENOMEM; 1867 1866 1868 1867 for (i = 0; i < ARRAY_SIZE(record_args); i++)

+3 -2

tools/perf/builtin-stat.c

··· 316 316 "\t Consider tweaking" 317 317 " /proc/sys/kernel/perf_event_paranoid or running as root.", 318 318 system_wide ? "system-wide " : ""); 319 + } else if (errno == ENOENT) { 320 + error("%s event is not supported. ", event_name(counter)); 319 321 } else { 320 322 error("open_counter returned with %d (%s). " 321 323 "/bin/dmesg may provide additional information.\n", ··· 685 683 nr_counters = ARRAY_SIZE(default_attrs); 686 684 687 685 for (c = 0; c < ARRAY_SIZE(default_attrs); ++c) { 688 - pos = perf_evsel__new(default_attrs[c].type, 689 - default_attrs[c].config, 686 + pos = perf_evsel__new(&default_attrs[c], 690 687 nr_counters); 691 688 if (pos == NULL) 692 689 goto out;

+115 -1

tools/perf/builtin-test.c

··· 234 234 return err; 235 235 } 236 236 237 + #include "util/cpumap.h" 237 238 #include "util/evsel.h" 238 239 #include <sys/types.h> 239 240 ··· 265 264 int err = -1, fd; 266 265 struct thread_map *threads; 267 266 struct perf_evsel *evsel; 267 + struct perf_event_attr attr; 268 268 unsigned int nr_open_calls = 111, i; 269 269 int id = trace_event__id("sys_enter_open"); 270 270 ··· 280 278 return -1; 281 279 } 282 280 283 - evsel = perf_evsel__new(PERF_TYPE_TRACEPOINT, id, 0); 281 + memset(&attr, 0, sizeof(attr)); 282 + attr.type = PERF_TYPE_TRACEPOINT; 283 + attr.config = id; 284 + evsel = perf_evsel__new(&attr, 0); 284 285 if (evsel == NULL) { 285 286 pr_debug("perf_evsel__new\n"); 286 287 goto out_thread_map_delete; ··· 322 317 return err; 323 318 } 324 319 320 + #include <sched.h> 321 + 322 + static int test__open_syscall_event_on_all_cpus(void) 323 + { 324 + int err = -1, fd, cpu; 325 + struct thread_map *threads; 326 + struct cpu_map *cpus; 327 + struct perf_evsel *evsel; 328 + struct perf_event_attr attr; 329 + unsigned int nr_open_calls = 111, i; 330 + cpu_set_t *cpu_set; 331 + size_t cpu_set_size; 332 + int id = trace_event__id("sys_enter_open"); 333 + 334 + if (id < 0) { 335 + pr_debug("is debugfs mounted on /sys/kernel/debug?\n"); 336 + return -1; 337 + } 338 + 339 + threads = thread_map__new(-1, getpid()); 340 + if (threads == NULL) { 341 + pr_debug("thread_map__new\n"); 342 + return -1; 343 + } 344 + 345 + cpus = cpu_map__new(NULL); 346 + if (threads == NULL) { 347 + pr_debug("thread_map__new\n"); 348 + return -1; 349 + } 350 + 351 + cpu_set = CPU_ALLOC(cpus->nr); 352 + 353 + if (cpu_set == NULL) 354 + goto out_thread_map_delete; 355 + 356 + cpu_set_size = CPU_ALLOC_SIZE(cpus->nr); 357 + CPU_ZERO_S(cpu_set_size, cpu_set); 358 + 359 + memset(&attr, 0, sizeof(attr)); 360 + attr.type = PERF_TYPE_TRACEPOINT; 361 + attr.config = id; 362 + evsel = perf_evsel__new(&attr, 0); 363 + if (evsel == NULL) { 364 + pr_debug("perf_evsel__new\n"); 365 + goto out_cpu_free; 366 + } 367 + 368 + if (perf_evsel__open(evsel, cpus, threads) < 0) { 369 + pr_debug("failed to open counter: %s, " 370 + "tweak /proc/sys/kernel/perf_event_paranoid?\n", 371 + strerror(errno)); 372 + goto out_evsel_delete; 373 + } 374 + 375 + for (cpu = 0; cpu < cpus->nr; ++cpu) { 376 + unsigned int ncalls = nr_open_calls + cpu; 377 + 378 + CPU_SET(cpu, cpu_set); 379 + sched_setaffinity(0, cpu_set_size, cpu_set); 380 + for (i = 0; i < ncalls; ++i) { 381 + fd = open("/etc/passwd", O_RDONLY); 382 + close(fd); 383 + } 384 + CPU_CLR(cpu, cpu_set); 385 + } 386 + 387 + /* 388 + * Here we need to explicitely preallocate the counts, as if 389 + * we use the auto allocation it will allocate just for 1 cpu, 390 + * as we start by cpu 0. 391 + */ 392 + if (perf_evsel__alloc_counts(evsel, cpus->nr) < 0) { 393 + pr_debug("perf_evsel__alloc_counts(ncpus=%d)\n", cpus->nr); 394 + goto out_close_fd; 395 + } 396 + 397 + for (cpu = 0; cpu < cpus->nr; ++cpu) { 398 + unsigned int expected; 399 + 400 + if (perf_evsel__read_on_cpu(evsel, cpu, 0) < 0) { 401 + pr_debug("perf_evsel__open_read_on_cpu\n"); 402 + goto out_close_fd; 403 + } 404 + 405 + expected = nr_open_calls + cpu; 406 + if (evsel->counts->cpu[cpu].val != expected) { 407 + pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls on cpu %d, got %Ld\n", 408 + expected, cpu, evsel->counts->cpu[cpu].val); 409 + goto out_close_fd; 410 + } 411 + } 412 + 413 + err = 0; 414 + out_close_fd: 415 + perf_evsel__close_fd(evsel, 1, threads->nr); 416 + out_evsel_delete: 417 + perf_evsel__delete(evsel); 418 + out_cpu_free: 419 + CPU_FREE(cpu_set); 420 + out_thread_map_delete: 421 + thread_map__delete(threads); 422 + return err; 423 + } 424 + 325 425 static struct test { 326 426 const char *desc; 327 427 int (*func)(void); ··· 438 328 { 439 329 .desc = "detect open syscall event", 440 330 .func = test__open_syscall_event, 331 + }, 332 + { 333 + .desc = "detect open syscall event on all cpus", 334 + .func = test__open_syscall_event_on_all_cpus, 441 335 }, 442 336 { 443 337 .func = NULL,

+2

tools/perf/builtin-top.c

··· 1247 1247 die("Permission error - are you root?\n" 1248 1248 "\t Consider tweaking" 1249 1249 " /proc/sys/kernel/perf_event_paranoid.\n"); 1250 + if (err == ENOENT) 1251 + die("%s event is not supported. ", event_name(evsel)); 1250 1252 /* 1251 1253 * If it's cycles then fall back to hrtimer 1252 1254 * based cpu-clock-tick sw counter, which

+58 -43

tools/perf/util/evsel.c

··· 6 6 7 7 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) 8 8 9 - struct perf_evsel *perf_evsel__new(u32 type, u64 config, int idx) 9 + struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx) 10 10 { 11 11 struct perf_evsel *evsel = zalloc(sizeof(*evsel)); 12 12 13 13 if (evsel != NULL) { 14 14 evsel->idx = idx; 15 - evsel->attr.type = type; 16 - evsel->attr.config = config; 15 + evsel->attr = *attr; 17 16 INIT_LIST_HEAD(&evsel->node); 18 17 } 19 18 ··· 127 128 return 0; 128 129 } 129 130 130 - int perf_evsel__open_per_cpu(struct perf_evsel *evsel, struct cpu_map *cpus) 131 + static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, 132 + struct thread_map *threads) 131 133 { 132 - int cpu; 134 + int cpu, thread; 133 135 134 - if (evsel->fd == NULL && perf_evsel__alloc_fd(evsel, cpus->nr, 1) < 0) 136 + if (evsel->fd == NULL && 137 + perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0) 135 138 return -1; 136 139 137 140 for (cpu = 0; cpu < cpus->nr; cpu++) { 138 - FD(evsel, cpu, 0) = sys_perf_event_open(&evsel->attr, -1, 139 - cpus->map[cpu], -1, 0); 140 - if (FD(evsel, cpu, 0) < 0) 141 - goto out_close; 141 + for (thread = 0; thread < threads->nr; thread++) { 142 + FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr, 143 + threads->map[thread], 144 + cpus->map[cpu], -1, 0); 145 + if (FD(evsel, cpu, thread) < 0) 146 + goto out_close; 147 + } 142 148 } 143 149 144 150 return 0; 145 151 146 152 out_close: 147 - while (--cpu >= 0) { 148 - close(FD(evsel, cpu, 0)); 149 - FD(evsel, cpu, 0) = -1; 150 - } 153 + do { 154 + while (--thread >= 0) { 155 + close(FD(evsel, cpu, thread)); 156 + FD(evsel, cpu, thread) = -1; 157 + } 158 + thread = threads->nr; 159 + } while (--cpu >= 0); 151 160 return -1; 161 + } 162 + 163 + static struct { 164 + struct cpu_map map; 165 + int cpus[1]; 166 + } empty_cpu_map = { 167 + .map.nr = 1, 168 + .cpus = { -1, }, 169 + }; 170 + 171 + static struct { 172 + struct thread_map map; 173 + int threads[1]; 174 + } empty_thread_map = { 175 + .map.nr = 1, 176 + .threads = { -1, }, 177 + }; 178 + 179 + int perf_evsel__open(struct perf_evsel *evsel, 180 + struct cpu_map *cpus, struct thread_map *threads) 181 + { 182 + 183 + if (cpus == NULL) { 184 + /* Work around old compiler warnings about strict aliasing */ 185 + cpus = &empty_cpu_map.map; 186 + } 187 + 188 + if (threads == NULL) 189 + threads = &empty_thread_map.map; 190 + 191 + return __perf_evsel__open(evsel, cpus, threads); 192 + } 193 + 194 + int perf_evsel__open_per_cpu(struct perf_evsel *evsel, struct cpu_map *cpus) 195 + { 196 + return __perf_evsel__open(evsel, cpus, &empty_thread_map.map); 152 197 } 153 198 154 199 int perf_evsel__open_per_thread(struct perf_evsel *evsel, struct thread_map *threads) 155 200 { 156 - int thread; 157 - 158 - if (evsel->fd == NULL && perf_evsel__alloc_fd(evsel, 1, threads->nr)) 159 - return -1; 160 - 161 - for (thread = 0; thread < threads->nr; thread++) { 162 - FD(evsel, 0, thread) = sys_perf_event_open(&evsel->attr, 163 - threads->map[thread], -1, -1, 0); 164 - if (FD(evsel, 0, thread) < 0) 165 - goto out_close; 166 - } 167 - 168 - return 0; 169 - 170 - out_close: 171 - while (--thread >= 0) { 172 - close(FD(evsel, 0, thread)); 173 - FD(evsel, 0, thread) = -1; 174 - } 175 - return -1; 176 - } 177 - 178 - int perf_evsel__open(struct perf_evsel *evsel, 179 - struct cpu_map *cpus, struct thread_map *threads) 180 - { 181 - if (threads == NULL) 182 - return perf_evsel__open_per_cpu(evsel, cpus); 183 - 184 - return perf_evsel__open_per_thread(evsel, threads); 201 + return __perf_evsel__open(evsel, &empty_cpu_map.map, threads); 185 202 }

+1 -1

tools/perf/util/evsel.h

··· 37 37 struct cpu_map; 38 38 struct thread_map; 39 39 40 - struct perf_evsel *perf_evsel__new(u32 type, u64 config, int idx); 40 + struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx); 41 41 void perf_evsel__delete(struct perf_evsel *evsel); 42 42 43 43 int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads);

+40 -34

tools/perf/util/parse-events.c

··· 490 490 return EVT_HANDLED_ALL; 491 491 } 492 492 493 + static int store_event_type(const char *orgname) 494 + { 495 + char filename[PATH_MAX], *c; 496 + FILE *file; 497 + int id, n; 498 + 499 + sprintf(filename, "%s/", debugfs_path); 500 + strncat(filename, orgname, strlen(orgname)); 501 + strcat(filename, "/id"); 502 + 503 + c = strchr(filename, ':'); 504 + if (c) 505 + *c = '/'; 506 + 507 + file = fopen(filename, "r"); 508 + if (!file) 509 + return 0; 510 + n = fscanf(file, "%i", &id); 511 + fclose(file); 512 + if (n < 1) { 513 + pr_err("cannot store event ID\n"); 514 + return -EINVAL; 515 + } 516 + return perf_header__push_event(id, orgname); 517 + } 493 518 494 519 static enum event_result parse_tracepoint_event(const char **strp, 495 520 struct perf_event_attr *attr) ··· 558 533 *strp += strlen(sys_name) + evt_length; 559 534 return parse_multiple_tracepoint_event(sys_name, evt_name, 560 535 flags); 561 - } else 536 + } else { 537 + if (store_event_type(evt_name) < 0) 538 + return EVT_FAILED; 539 + 562 540 return parse_single_tracepoint_event(sys_name, evt_name, 563 541 evt_length, attr, strp); 542 + } 564 543 } 565 544 566 545 static enum event_result ··· 807 778 return ret; 808 779 } 809 780 810 - static int store_event_type(const char *orgname) 811 - { 812 - char filename[PATH_MAX], *c; 813 - FILE *file; 814 - int id, n; 815 - 816 - sprintf(filename, "%s/", debugfs_path); 817 - strncat(filename, orgname, strlen(orgname)); 818 - strcat(filename, "/id"); 819 - 820 - c = strchr(filename, ':'); 821 - if (c) 822 - *c = '/'; 823 - 824 - file = fopen(filename, "r"); 825 - if (!file) 826 - return 0; 827 - n = fscanf(file, "%i", &id); 828 - fclose(file); 829 - if (n < 1) { 830 - pr_err("cannot store event ID\n"); 831 - return -EINVAL; 832 - } 833 - return perf_header__push_event(id, orgname); 834 - } 835 - 836 781 int parse_events(const struct option *opt __used, const char *str, int unset __used) 837 782 { 838 783 struct perf_event_attr attr; 839 784 enum event_result ret; 840 - 841 - if (strchr(str, ':')) 842 - if (store_event_type(str) < 0) 843 - return -1; 844 785 845 786 for (;;) { 846 787 memset(&attr, 0, sizeof(attr)); ··· 823 824 824 825 if (ret != EVT_HANDLED_ALL) { 825 826 struct perf_evsel *evsel; 826 - evsel = perf_evsel__new(attr.type, attr.config, 827 + evsel = perf_evsel__new(&attr, 827 828 nr_counters); 828 829 if (evsel == NULL) 829 830 return -1; ··· 1013 1014 1014 1015 int perf_evsel_list__create_default(void) 1015 1016 { 1016 - struct perf_evsel *evsel = perf_evsel__new(PERF_TYPE_HARDWARE, 1017 - PERF_COUNT_HW_CPU_CYCLES, 0); 1017 + struct perf_evsel *evsel; 1018 + struct perf_event_attr attr; 1019 + 1020 + memset(&attr, 0, sizeof(attr)); 1021 + attr.type = PERF_TYPE_HARDWARE; 1022 + attr.config = PERF_COUNT_HW_CPU_CYCLES; 1023 + 1024 + evsel = perf_evsel__new(&attr, 0); 1025 + 1018 1026 if (evsel == NULL) 1019 1027 return -ENOMEM; 1020 1028

+1 -1

tools/perf/util/session.c

··· 1007 1007 if (size == 0) 1008 1008 size = 8; 1009 1009 1010 - if (head + event->header.size >= mmap_size) { 1010 + if (head + event->header.size > mmap_size) { 1011 1011 if (mmaps[map_idx]) { 1012 1012 munmap(mmaps[map_idx], mmap_size); 1013 1013 mmaps[map_idx] = NULL;