Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf fixes from Ingo Molnar:
"Misc fixes:
- various tooling fixes
- kretprobe fixes
- kprobes annotation fixes
- kprobes error checking fix
- fix the default events for AMD Family 17h CPUs
- PEBS fix
- AUX record fix
- address filtering fix"

* 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/kprobes: Avoid kretprobe recursion bug
kprobes: Mark ftrace mcount handler functions nokprobe
x86/kprobes: Verify stack frame on kretprobe
perf/x86/amd: Add event map for AMD Family 17h
perf bpf: Return NULL when RB tree lookup fails in perf_env__find_btf()
perf tools: Fix map reference counting
perf evlist: Fix side band thread draining
perf tools: Check maps for bpf programs
perf bpf: Return NULL when RB tree lookup fails in perf_env__find_bpf_prog_info()
tools include uapi: Sync sound/asound.h copy
perf top: Always sample time to satisfy needs of use of ordered queuing
perf evsel: Use hweight64() instead of hweight_long(attr.sample_regs_user)
tools lib traceevent: Fix missing equality check for strcmp
perf stat: Disable DIR_FORMAT feature for 'perf stat record'
perf scripts python: export-to-sqlite.py: Fix use of parent_id in calls_view
perf header: Fix lock/unlock imbalances when processing BPF/BTF info
perf/x86: Fix incorrect PEBS_REGS
perf/ring_buffer: Fix AUX record suppression
perf/core: Fix the address filtering fix
kprobes: Fix error check when reusing optimized probes

+195 -98
+26 -9
arch/x86/events/amd/core.c
··· 117 117 }; 118 118 119 119 /* 120 - * AMD Performance Monitor K7 and later. 120 + * AMD Performance Monitor K7 and later, up to and including Family 16h: 121 121 */ 122 122 static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] = 123 123 { 124 - [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, 125 - [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, 126 - [PERF_COUNT_HW_CACHE_REFERENCES] = 0x077d, 127 - [PERF_COUNT_HW_CACHE_MISSES] = 0x077e, 128 - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, 129 - [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, 130 - [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00d0, /* "Decoder empty" event */ 131 - [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x00d1, /* "Dispatch stalls" event */ 124 + [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, 125 + [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, 126 + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x077d, 127 + [PERF_COUNT_HW_CACHE_MISSES] = 0x077e, 128 + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, 129 + [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, 130 + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00d0, /* "Decoder empty" event */ 131 + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x00d1, /* "Dispatch stalls" event */ 132 + }; 133 + 134 + /* 135 + * AMD Performance Monitor Family 17h and later: 136 + */ 137 + static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] = 138 + { 139 + [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, 140 + [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, 141 + [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60, 142 + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, 143 + [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, 144 + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x0287, 145 + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x0187, 132 146 }; 133 147 134 148 static u64 amd_pmu_event_map(int hw_event) 135 149 { 150 + if (boot_cpu_data.x86 >= 0x17) 151 + return amd_f17h_perfmon_event_map[hw_event]; 152 + 136 153 return amd_perfmon_event_map[hw_event]; 137 154 } 138 155
+1 -1
arch/x86/events/intel/core.c
··· 3131 3131 flags &= ~PERF_SAMPLE_TIME; 3132 3132 if (!event->attr.exclude_kernel) 3133 3133 flags &= ~PERF_SAMPLE_REGS_USER; 3134 - if (event->attr.sample_regs_user & ~PEBS_REGS) 3134 + if (event->attr.sample_regs_user & ~PEBS_GP_REGS) 3135 3135 flags &= ~(PERF_SAMPLE_REGS_USER | PERF_SAMPLE_REGS_INTR); 3136 3136 return flags; 3137 3137 }
+19 -19
arch/x86/events/perf_event.h
··· 96 96 PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER | \ 97 97 PERF_SAMPLE_PERIOD) 98 98 99 - #define PEBS_REGS \ 100 - (PERF_REG_X86_AX | \ 101 - PERF_REG_X86_BX | \ 102 - PERF_REG_X86_CX | \ 103 - PERF_REG_X86_DX | \ 104 - PERF_REG_X86_DI | \ 105 - PERF_REG_X86_SI | \ 106 - PERF_REG_X86_SP | \ 107 - PERF_REG_X86_BP | \ 108 - PERF_REG_X86_IP | \ 109 - PERF_REG_X86_FLAGS | \ 110 - PERF_REG_X86_R8 | \ 111 - PERF_REG_X86_R9 | \ 112 - PERF_REG_X86_R10 | \ 113 - PERF_REG_X86_R11 | \ 114 - PERF_REG_X86_R12 | \ 115 - PERF_REG_X86_R13 | \ 116 - PERF_REG_X86_R14 | \ 117 - PERF_REG_X86_R15) 99 + #define PEBS_GP_REGS \ 100 + ((1ULL << PERF_REG_X86_AX) | \ 101 + (1ULL << PERF_REG_X86_BX) | \ 102 + (1ULL << PERF_REG_X86_CX) | \ 103 + (1ULL << PERF_REG_X86_DX) | \ 104 + (1ULL << PERF_REG_X86_DI) | \ 105 + (1ULL << PERF_REG_X86_SI) | \ 106 + (1ULL << PERF_REG_X86_SP) | \ 107 + (1ULL << PERF_REG_X86_BP) | \ 108 + (1ULL << PERF_REG_X86_IP) | \ 109 + (1ULL << PERF_REG_X86_FLAGS) | \ 110 + (1ULL << PERF_REG_X86_R8) | \ 111 + (1ULL << PERF_REG_X86_R9) | \ 112 + (1ULL << PERF_REG_X86_R10) | \ 113 + (1ULL << PERF_REG_X86_R11) | \ 114 + (1ULL << PERF_REG_X86_R12) | \ 115 + (1ULL << PERF_REG_X86_R13) | \ 116 + (1ULL << PERF_REG_X86_R14) | \ 117 + (1ULL << PERF_REG_X86_R15)) 118 118 119 119 /* 120 120 * Per register state.
+46 -2
arch/x86/kernel/kprobes/core.c
··· 569 569 unsigned long *sara = stack_addr(regs); 570 570 571 571 ri->ret_addr = (kprobe_opcode_t *) *sara; 572 + ri->fp = sara; 572 573 573 574 /* Replace the return addr with trampoline addr */ 574 575 *sara = (unsigned long) &kretprobe_trampoline; ··· 749 748 NOKPROBE_SYMBOL(kretprobe_trampoline); 750 749 STACK_FRAME_NON_STANDARD(kretprobe_trampoline); 751 750 751 + static struct kprobe kretprobe_kprobe = { 752 + .addr = (void *)kretprobe_trampoline, 753 + }; 754 + 752 755 /* 753 756 * Called from kretprobe_trampoline 754 757 */ 755 758 static __used void *trampoline_handler(struct pt_regs *regs) 756 759 { 760 + struct kprobe_ctlblk *kcb; 757 761 struct kretprobe_instance *ri = NULL; 758 762 struct hlist_head *head, empty_rp; 759 763 struct hlist_node *tmp; 760 764 unsigned long flags, orig_ret_address = 0; 761 765 unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; 762 766 kprobe_opcode_t *correct_ret_addr = NULL; 767 + void *frame_pointer; 768 + bool skipped = false; 769 + 770 + preempt_disable(); 771 + 772 + /* 773 + * Set a dummy kprobe for avoiding kretprobe recursion. 774 + * Since kretprobe never run in kprobe handler, kprobe must not 775 + * be running at this point. 776 + */ 777 + kcb = get_kprobe_ctlblk(); 778 + __this_cpu_write(current_kprobe, &kretprobe_kprobe); 779 + kcb->kprobe_status = KPROBE_HIT_ACTIVE; 763 780 764 781 INIT_HLIST_HEAD(&empty_rp); 765 782 kretprobe_hash_lock(current, &head, &flags); 766 783 /* fixup registers */ 767 784 #ifdef CONFIG_X86_64 768 785 regs->cs = __KERNEL_CS; 786 + /* On x86-64, we use pt_regs->sp for return address holder. */ 787 + frame_pointer = &regs->sp; 769 788 #else 770 789 regs->cs = __KERNEL_CS | get_kernel_rpl(); 771 790 regs->gs = 0; 791 + /* On x86-32, we use pt_regs->flags for return address holder. */ 792 + frame_pointer = &regs->flags; 772 793 #endif 773 794 regs->ip = trampoline_address; 774 795 regs->orig_ax = ~0UL; ··· 812 789 if (ri->task != current) 813 790 /* another task is sharing our hash bucket */ 814 791 continue; 792 + /* 793 + * Return probes must be pushed on this hash list correct 794 + * order (same as return order) so that it can be poped 795 + * correctly. However, if we find it is pushed it incorrect 796 + * order, this means we find a function which should not be 797 + * probed, because the wrong order entry is pushed on the 798 + * path of processing other kretprobe itself. 799 + */ 800 + if (ri->fp != frame_pointer) { 801 + if (!skipped) 802 + pr_warn("kretprobe is stacked incorrectly. Trying to fixup.\n"); 803 + skipped = true; 804 + continue; 805 + } 815 806 816 807 orig_ret_address = (unsigned long)ri->ret_addr; 808 + if (skipped) 809 + pr_warn("%ps must be blacklisted because of incorrect kretprobe order\n", 810 + ri->rp->kp.addr); 817 811 818 812 if (orig_ret_address != trampoline_address) 819 813 /* ··· 848 808 if (ri->task != current) 849 809 /* another task is sharing our hash bucket */ 850 810 continue; 811 + if (ri->fp != frame_pointer) 812 + continue; 851 813 852 814 orig_ret_address = (unsigned long)ri->ret_addr; 853 815 if (ri->rp && ri->rp->handler) { 854 816 __this_cpu_write(current_kprobe, &ri->rp->kp); 855 - get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE; 856 817 ri->ret_addr = correct_ret_addr; 857 818 ri->rp->handler(ri, regs); 858 - __this_cpu_write(current_kprobe, NULL); 819 + __this_cpu_write(current_kprobe, &kretprobe_kprobe); 859 820 } 860 821 861 822 recycle_rp_inst(ri, &empty_rp); ··· 871 830 } 872 831 873 832 kretprobe_hash_unlock(current, &flags); 833 + 834 + __this_cpu_write(current_kprobe, NULL); 835 + preempt_enable(); 874 836 875 837 hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { 876 838 hlist_del(&ri->hlist);
+1
include/linux/kprobes.h
··· 173 173 struct kretprobe *rp; 174 174 kprobe_opcode_t *ret_addr; 175 175 struct task_struct *task; 176 + void *fp; 176 177 char data[0]; 177 178 }; 178 179
+21 -16
kernel/events/core.c
··· 9077 9077 if (task == TASK_TOMBSTONE) 9078 9078 return; 9079 9079 9080 - if (!ifh->nr_file_filters) 9081 - return; 9080 + if (ifh->nr_file_filters) { 9081 + mm = get_task_mm(event->ctx->task); 9082 + if (!mm) 9083 + goto restart; 9082 9084 9083 - mm = get_task_mm(event->ctx->task); 9084 - if (!mm) 9085 - goto restart; 9086 - 9087 - down_read(&mm->mmap_sem); 9085 + down_read(&mm->mmap_sem); 9086 + } 9088 9087 9089 9088 raw_spin_lock_irqsave(&ifh->lock, flags); 9090 9089 list_for_each_entry(filter, &ifh->list, entry) { 9091 - event->addr_filter_ranges[count].start = 0; 9092 - event->addr_filter_ranges[count].size = 0; 9090 + if (filter->path.dentry) { 9091 + /* 9092 + * Adjust base offset if the filter is associated to a 9093 + * binary that needs to be mapped: 9094 + */ 9095 + event->addr_filter_ranges[count].start = 0; 9096 + event->addr_filter_ranges[count].size = 0; 9093 9097 9094 - /* 9095 - * Adjust base offset if the filter is associated to a binary 9096 - * that needs to be mapped: 9097 - */ 9098 - if (filter->path.dentry) 9099 9098 perf_addr_filter_apply(filter, mm, &event->addr_filter_ranges[count]); 9099 + } else { 9100 + event->addr_filter_ranges[count].start = filter->offset; 9101 + event->addr_filter_ranges[count].size = filter->size; 9102 + } 9100 9103 9101 9104 count++; 9102 9105 } ··· 9107 9104 event->addr_filters_gen++; 9108 9105 raw_spin_unlock_irqrestore(&ifh->lock, flags); 9109 9106 9110 - up_read(&mm->mmap_sem); 9107 + if (ifh->nr_file_filters) { 9108 + up_read(&mm->mmap_sem); 9111 9109 9112 - mmput(mm); 9110 + mmput(mm); 9111 + } 9113 9112 9114 9113 restart: 9115 9114 perf_event_stop(event, 1);
+15 -18
kernel/events/ring_buffer.c
··· 455 455 rb->aux_head += size; 456 456 } 457 457 458 - if (size || handle->aux_flags) { 459 - /* 460 - * Only send RECORD_AUX if we have something useful to communicate 461 - * 462 - * Note: the OVERWRITE records by themselves are not considered 463 - * useful, as they don't communicate any *new* information, 464 - * aside from the short-lived offset, that becomes history at 465 - * the next event sched-in and therefore isn't useful. 466 - * The userspace that needs to copy out AUX data in overwrite 467 - * mode should know to use user_page::aux_head for the actual 468 - * offset. So, from now on we don't output AUX records that 469 - * have *only* OVERWRITE flag set. 470 - */ 471 - 472 - if (handle->aux_flags & ~(u64)PERF_AUX_FLAG_OVERWRITE) 473 - perf_event_aux_event(handle->event, aux_head, size, 474 - handle->aux_flags); 475 - } 458 + /* 459 + * Only send RECORD_AUX if we have something useful to communicate 460 + * 461 + * Note: the OVERWRITE records by themselves are not considered 462 + * useful, as they don't communicate any *new* information, 463 + * aside from the short-lived offset, that becomes history at 464 + * the next event sched-in and therefore isn't useful. 465 + * The userspace that needs to copy out AUX data in overwrite 466 + * mode should know to use user_page::aux_head for the actual 467 + * offset. So, from now on we don't output AUX records that 468 + * have *only* OVERWRITE flag set. 469 + */ 470 + if (size || (handle->aux_flags & ~(u64)PERF_AUX_FLAG_OVERWRITE)) 471 + perf_event_aux_event(handle->event, aux_head, size, 472 + handle->aux_flags); 476 473 477 474 rb->user_page->aux_head = rb->aux_head; 478 475 if (rb_need_aux_wakeup(rb))
+2 -4
kernel/kprobes.c
··· 709 709 static int reuse_unused_kprobe(struct kprobe *ap) 710 710 { 711 711 struct optimized_kprobe *op; 712 - int ret; 713 712 714 713 /* 715 714 * Unused kprobe MUST be on the way of delayed unoptimizing (means ··· 719 720 /* Enable the probe again */ 720 721 ap->flags &= ~KPROBE_FLAG_DISABLED; 721 722 /* Optimize it again (remove from op->list) */ 722 - ret = kprobe_optready(ap); 723 - if (ret) 724 - return ret; 723 + if (!kprobe_optready(ap)) 724 + return -EINVAL; 725 725 726 726 optimize_kprobe(ap); 727 727 return 0;
+5 -1
kernel/trace/ftrace.c
··· 33 33 #include <linux/list.h> 34 34 #include <linux/hash.h> 35 35 #include <linux/rcupdate.h> 36 + #include <linux/kprobes.h> 36 37 37 38 #include <trace/events/sched.h> 38 39 ··· 6247 6246 tr->ops->func = ftrace_stub; 6248 6247 } 6249 6248 6250 - static inline void 6249 + static nokprobe_inline void 6251 6250 __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, 6252 6251 struct ftrace_ops *ignored, struct pt_regs *regs) 6253 6252 { ··· 6307 6306 { 6308 6307 __ftrace_ops_list_func(ip, parent_ip, NULL, regs); 6309 6308 } 6309 + NOKPROBE_SYMBOL(ftrace_ops_list_func); 6310 6310 #else 6311 6311 static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip) 6312 6312 { 6313 6313 __ftrace_ops_list_func(ip, parent_ip, NULL, NULL); 6314 6314 } 6315 + NOKPROBE_SYMBOL(ftrace_ops_no_ops); 6315 6316 #endif 6316 6317 6317 6318 /* ··· 6340 6337 preempt_enable_notrace(); 6341 6338 trace_clear_recursion(bit); 6342 6339 } 6340 + NOKPROBE_SYMBOL(ftrace_ops_assist_func); 6343 6341 6344 6342 /** 6345 6343 * ftrace_ops_get_func - get the function a trampoline should call
+1
tools/include/uapi/sound/asound.h
··· 32 32 33 33 #ifndef __KERNEL__ 34 34 #include <stdlib.h> 35 + #include <time.h> 35 36 #endif 36 37 37 38 /*
+1 -1
tools/lib/traceevent/event-parse.c
··· 2233 2233 return val & 0xffffffff; 2234 2234 2235 2235 if (strcmp(type, "u64") == 0 || 2236 - strcmp(type, "s64")) 2236 + strcmp(type, "s64") == 0) 2237 2237 return val; 2238 2238 2239 2239 if (strcmp(type, "s8") == 0)
+1
tools/perf/builtin-stat.c
··· 1308 1308 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++) 1309 1309 perf_header__set_feat(&session->header, feat); 1310 1310 1311 + perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT); 1311 1312 perf_header__clear_feat(&session->header, HEADER_BUILD_ID); 1312 1313 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA); 1313 1314 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
+1
tools/perf/builtin-top.c
··· 1377 1377 * */ 1378 1378 .overwrite = 0, 1379 1379 .sample_time = true, 1380 + .sample_time_set = true, 1380 1381 }, 1381 1382 .max_stack = sysctl__max_stack(), 1382 1383 .annotation_opts = annotation__default_options,
+1 -1
tools/perf/scripts/python/export-to-sqlite.py
··· 331 331 'return_id,' 332 332 'CASE WHEN flags=0 THEN \'\' WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' WHEN flags=6 THEN \'jump\' ELSE flags END AS flags,' 333 333 'parent_call_path_id,' 334 - 'parent_id' 334 + 'calls.parent_id' 335 335 ' FROM calls INNER JOIN call_paths ON call_paths.id = call_path_id') 336 336 337 337 do_query(query, 'CREATE VIEW samples_view AS '
+6 -2
tools/perf/util/env.c
··· 57 57 else if (prog_id > node->info_linear->info.id) 58 58 n = n->rb_right; 59 59 else 60 - break; 60 + goto out; 61 61 } 62 + node = NULL; 62 63 64 + out: 63 65 up_read(&env->bpf_progs.lock); 64 66 return node; 65 67 } ··· 111 109 else if (btf_id > node->id) 112 110 n = n->rb_right; 113 111 else 114 - break; 112 + goto out; 115 113 } 114 + node = NULL; 116 115 117 116 up_read(&env->bpf_progs.lock); 117 + out: 118 118 return node; 119 119 } 120 120
+9 -5
tools/perf/util/evlist.c
··· 1868 1868 { 1869 1869 struct perf_evlist *evlist = arg; 1870 1870 bool draining = false; 1871 - int i; 1871 + int i, done = 0; 1872 1872 1873 - while (draining || !(evlist->thread.done)) { 1874 - if (draining) 1875 - draining = false; 1876 - else if (evlist->thread.done) 1873 + while (!done) { 1874 + bool got_data = false; 1875 + 1876 + if (evlist->thread.done) 1877 1877 draining = true; 1878 1878 1879 1879 if (!draining) ··· 1894 1894 pr_warning("cannot locate proper evsel for the side band event\n"); 1895 1895 1896 1896 perf_mmap__consume(map); 1897 + got_data = true; 1897 1898 } 1898 1899 perf_mmap__read_done(map); 1899 1900 } 1901 + 1902 + if (draining && !got_data) 1903 + break; 1900 1904 } 1901 1905 return NULL; 1902 1906 }
+6 -6
tools/perf/util/evsel.c
··· 2368 2368 if (data->user_regs.abi) { 2369 2369 u64 mask = evsel->attr.sample_regs_user; 2370 2370 2371 - sz = hweight_long(mask) * sizeof(u64); 2371 + sz = hweight64(mask) * sizeof(u64); 2372 2372 OVERFLOW_CHECK(array, sz, max_size); 2373 2373 data->user_regs.mask = mask; 2374 2374 data->user_regs.regs = (u64 *)array; ··· 2424 2424 if (data->intr_regs.abi != PERF_SAMPLE_REGS_ABI_NONE) { 2425 2425 u64 mask = evsel->attr.sample_regs_intr; 2426 2426 2427 - sz = hweight_long(mask) * sizeof(u64); 2427 + sz = hweight64(mask) * sizeof(u64); 2428 2428 OVERFLOW_CHECK(array, sz, max_size); 2429 2429 data->intr_regs.mask = mask; 2430 2430 data->intr_regs.regs = (u64 *)array; ··· 2552 2552 if (type & PERF_SAMPLE_REGS_USER) { 2553 2553 if (sample->user_regs.abi) { 2554 2554 result += sizeof(u64); 2555 - sz = hweight_long(sample->user_regs.mask) * sizeof(u64); 2555 + sz = hweight64(sample->user_regs.mask) * sizeof(u64); 2556 2556 result += sz; 2557 2557 } else { 2558 2558 result += sizeof(u64); ··· 2580 2580 if (type & PERF_SAMPLE_REGS_INTR) { 2581 2581 if (sample->intr_regs.abi) { 2582 2582 result += sizeof(u64); 2583 - sz = hweight_long(sample->intr_regs.mask) * sizeof(u64); 2583 + sz = hweight64(sample->intr_regs.mask) * sizeof(u64); 2584 2584 result += sz; 2585 2585 } else { 2586 2586 result += sizeof(u64); ··· 2710 2710 if (type & PERF_SAMPLE_REGS_USER) { 2711 2711 if (sample->user_regs.abi) { 2712 2712 *array++ = sample->user_regs.abi; 2713 - sz = hweight_long(sample->user_regs.mask) * sizeof(u64); 2713 + sz = hweight64(sample->user_regs.mask) * sizeof(u64); 2714 2714 memcpy(array, sample->user_regs.regs, sz); 2715 2715 array = (void *)array + sz; 2716 2716 } else { ··· 2746 2746 if (type & PERF_SAMPLE_REGS_INTR) { 2747 2747 if (sample->intr_regs.abi) { 2748 2748 *array++ = sample->intr_regs.abi; 2749 - sz = hweight_long(sample->intr_regs.mask) * sizeof(u64); 2749 + sz = hweight64(sample->intr_regs.mask) * sizeof(u64); 2750 2750 memcpy(array, sample->intr_regs.regs, sz); 2751 2751 array = (void *)array + sz; 2752 2752 } else {
+13 -9
tools/perf/util/header.c
··· 2606 2606 perf_env__insert_bpf_prog_info(env, info_node); 2607 2607 } 2608 2608 2609 + up_write(&env->bpf_progs.lock); 2609 2610 return 0; 2610 2611 out: 2611 2612 free(info_linear); ··· 2624 2623 static int process_bpf_btf(struct feat_fd *ff, void *data __maybe_unused) 2625 2624 { 2626 2625 struct perf_env *env = &ff->ph->env; 2626 + struct btf_node *node = NULL; 2627 2627 u32 count, i; 2628 + int err = -1; 2628 2629 2629 2630 if (ff->ph->needs_swap) { 2630 2631 pr_warning("interpreting btf from systems with endianity is not yet supported\n"); ··· 2639 2636 down_write(&env->bpf_progs.lock); 2640 2637 2641 2638 for (i = 0; i < count; ++i) { 2642 - struct btf_node *node; 2643 2639 u32 id, data_size; 2644 2640 2645 2641 if (do_read_u32(ff, &id)) 2646 - return -1; 2642 + goto out; 2647 2643 if (do_read_u32(ff, &data_size)) 2648 - return -1; 2644 + goto out; 2649 2645 2650 2646 node = malloc(sizeof(struct btf_node) + data_size); 2651 2647 if (!node) 2652 - return -1; 2648 + goto out; 2653 2649 2654 2650 node->id = id; 2655 2651 node->data_size = data_size; 2656 2652 2657 - if (__do_read(ff, node->data, data_size)) { 2658 - free(node); 2659 - return -1; 2660 - } 2653 + if (__do_read(ff, node->data, data_size)) 2654 + goto out; 2661 2655 2662 2656 perf_env__insert_btf(env, node); 2657 + node = NULL; 2663 2658 } 2664 2659 2660 + err = 0; 2661 + out: 2665 2662 up_write(&env->bpf_progs.lock); 2666 - return 0; 2663 + free(node); 2664 + return err; 2667 2665 } 2668 2666 2669 2667 struct feature_ops {
+17 -3
tools/perf/util/map.c
··· 261 261 return kmap && kmap->name[0]; 262 262 } 263 263 264 + bool __map__is_bpf_prog(const struct map *map) 265 + { 266 + const char *name; 267 + 268 + if (map->dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO) 269 + return true; 270 + 271 + /* 272 + * If PERF_RECORD_BPF_EVENT is not included, the dso will not have 273 + * type of DSO_BINARY_TYPE__BPF_PROG_INFO. In such cases, we can 274 + * guess the type based on name. 275 + */ 276 + name = map->dso->short_name; 277 + return name && (strstr(name, "bpf_prog_") == name); 278 + } 279 + 264 280 bool map__has_symbols(const struct map *map) 265 281 { 266 282 return dso__has_symbols(map->dso); ··· 926 910 rc = strcmp(m->dso->short_name, map->dso->short_name); 927 911 if (rc < 0) 928 912 p = &(*p)->rb_left; 929 - else if (rc > 0) 930 - p = &(*p)->rb_right; 931 913 else 932 - return; 914 + p = &(*p)->rb_right; 933 915 } 934 916 rb_link_node(&map->rb_node_name, parent, p); 935 917 rb_insert_color(&map->rb_node_name, &maps->names);
+3 -1
tools/perf/util/map.h
··· 159 159 160 160 bool __map__is_kernel(const struct map *map); 161 161 bool __map__is_extra_kernel_map(const struct map *map); 162 + bool __map__is_bpf_prog(const struct map *map); 162 163 163 164 static inline bool __map__is_kmodule(const struct map *map) 164 165 { 165 - return !__map__is_kernel(map) && !__map__is_extra_kernel_map(map); 166 + return !__map__is_kernel(map) && !__map__is_extra_kernel_map(map) && 167 + !__map__is_bpf_prog(map); 166 168 } 167 169 168 170 bool map__has_symbols(const struct map *map);