Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf thread-stack: Add thread_stack__br_sample_late()

Add a thread stack function to create a branch stack for hardware events
where the sample records get created some time after the event occurred.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lore.kernel.org/lkml/20200429150751.12570-7-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

Adrian Hunter and committed by
Arnaldo Carvalho de Melo
3749e0bb 6cd2cbfc

+107
+104
tools/perf/util/thread-stack.c
··· 645 645 } 646 646 } 647 647 648 + /* Start of user space branch entries */ 649 + static bool us_start(struct branch_entry *be, u64 kernel_start, bool *start) 650 + { 651 + if (!*start) 652 + *start = be->to && be->to < kernel_start; 653 + 654 + return *start; 655 + } 656 + 657 + /* 658 + * Start of branch entries after the ip fell in between 2 branches, or user 659 + * space branch entries. 660 + */ 661 + static bool ks_start(struct branch_entry *be, u64 sample_ip, u64 kernel_start, 662 + bool *start, struct branch_entry *nb) 663 + { 664 + if (!*start) { 665 + *start = (nb && sample_ip >= be->to && sample_ip <= nb->from) || 666 + be->from < kernel_start || 667 + (be->to && be->to < kernel_start); 668 + } 669 + 670 + return *start; 671 + } 672 + 673 + /* 674 + * Hardware sample records, created some time after the event occurred, need to 675 + * have subsequent addresses removed from the branch stack. 676 + */ 677 + void thread_stack__br_sample_late(struct thread *thread, int cpu, 678 + struct branch_stack *dst, unsigned int sz, 679 + u64 ip, u64 kernel_start) 680 + { 681 + struct thread_stack *ts = thread__stack(thread, cpu); 682 + struct branch_entry *d, *s, *spos, *ssz; 683 + struct branch_stack *src; 684 + unsigned int nr = 0; 685 + bool start = false; 686 + 687 + dst->nr = 0; 688 + 689 + if (!ts) 690 + return; 691 + 692 + src = ts->br_stack_rb; 693 + if (!src->nr) 694 + return; 695 + 696 + spos = &src->entries[ts->br_stack_pos]; 697 + ssz = &src->entries[ts->br_stack_sz]; 698 + 699 + d = &dst->entries[0]; 700 + s = spos; 701 + 702 + if (ip < kernel_start) { 703 + /* 704 + * User space sample: start copying branch entries when the 705 + * branch is in user space. 706 + */ 707 + for (s = spos; s < ssz && nr < sz; s++) { 708 + if (us_start(s, kernel_start, &start)) { 709 + *d++ = *s; 710 + nr += 1; 711 + } 712 + } 713 + 714 + if (src->nr >= ts->br_stack_sz) { 715 + for (s = &src->entries[0]; s < spos && nr < sz; s++) { 716 + if (us_start(s, kernel_start, &start)) { 717 + *d++ = *s; 718 + nr += 1; 719 + } 720 + } 721 + } 722 + } else { 723 + struct branch_entry *nb = NULL; 724 + 725 + /* 726 + * Kernel space sample: start copying branch entries when the ip 727 + * falls in between 2 branches (or the branch is in user space 728 + * because then the start must have been missed). 729 + */ 730 + for (s = spos; s < ssz && nr < sz; s++) { 731 + if (ks_start(s, ip, kernel_start, &start, nb)) { 732 + *d++ = *s; 733 + nr += 1; 734 + } 735 + nb = s; 736 + } 737 + 738 + if (src->nr >= ts->br_stack_sz) { 739 + for (s = &src->entries[0]; s < spos && nr < sz; s++) { 740 + if (ks_start(s, ip, kernel_start, &start, nb)) { 741 + *d++ = *s; 742 + nr += 1; 743 + } 744 + nb = s; 745 + } 746 + } 747 + } 748 + 749 + dst->nr = nr; 750 + } 751 + 648 752 struct call_return_processor * 649 753 call_return_processor__new(int (*process)(struct call_return *cr, u64 *parent_db_id, void *data), 650 754 void *data)
+3
tools/perf/util/thread-stack.h
··· 91 91 u64 kernel_start); 92 92 void thread_stack__br_sample(struct thread *thread, int cpu, 93 93 struct branch_stack *dst, unsigned int sz); 94 + void thread_stack__br_sample_late(struct thread *thread, int cpu, 95 + struct branch_stack *dst, unsigned int sz, 96 + u64 sample_ip, u64 kernel_start); 94 97 int thread_stack__flush(struct thread *thread); 95 98 void thread_stack__free(struct thread *thread); 96 99 size_t thread_stack__depth(struct thread *thread, int cpu);