Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf/x86/intel: Add Haswell PEBS record support

Add support for the Haswell extended (fmt2) PEBS format.

It has a superset of the nhm (fmt1) PEBS fields, but has a
longer record so we need to adjust the code paths.

The main advantage is the new "EventingRip" support which
directly gives the instruction, not off-by-one instruction. So
with precise == 2 we use that directly and don't try to use LBRs
and walking basic blocks. This lowers the overhead of using
precise significantly.

Some other features are added in later patches.

Reviewed-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Andi Kleen <ak@linux.jf.intel.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Link: http://lkml.kernel.org/r/1371515812-9646-2-git-send-email-andi@firstfloor.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>

authored by

Andi Kleen and committed by
Ingo Molnar
130768b8 b2fa344d

+91 -22
+2 -1
arch/x86/kernel/cpu/perf_event.c
··· 403 403 * check that PEBS LBR correction does not conflict with 404 404 * whatever the user is asking with attr->branch_sample_type 405 405 */ 406 - if (event->attr.precise_ip > 1) { 406 + if (event->attr.precise_ip > 1 && 407 + x86_pmu.intel_cap.pebs_format < 2) { 407 408 u64 *br_type = &event->attr.branch_sample_type; 408 409 409 410 if (has_branch_stack(event)) {
+89 -21
arch/x86/kernel/cpu/perf_event_intel_ds.c
··· 165 165 u64 status, dla, dse, lat; 166 166 }; 167 167 168 + /* 169 + * Same as pebs_record_nhm, with two additional fields. 170 + */ 171 + struct pebs_record_hsw { 172 + struct pebs_record_nhm nhm; 173 + /* 174 + * Real IP of the event. In the Intel documentation this 175 + * is called eventingrip. 176 + */ 177 + u64 real_ip; 178 + /* 179 + * TSX tuning information field: abort cycles and abort flags. 180 + */ 181 + u64 tsx_tuning; 182 + }; 183 + 168 184 void init_debug_store_on_cpu(int cpu) 169 185 { 170 186 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; ··· 713 697 */ 714 698 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 715 699 struct pebs_record_nhm *pebs = __pebs; 700 + struct pebs_record_hsw *pebs_hsw = __pebs; 716 701 struct perf_sample_data data; 717 702 struct pt_regs regs; 718 703 u64 sample_type; ··· 770 753 regs.bp = pebs->bp; 771 754 regs.sp = pebs->sp; 772 755 773 - if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(&regs)) 756 + if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) { 757 + regs.ip = pebs_hsw->real_ip; 758 + regs.flags |= PERF_EFLAGS_EXACT; 759 + } else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(&regs)) 774 760 regs.flags |= PERF_EFLAGS_EXACT; 775 761 else 776 762 regs.flags &= ~PERF_EFLAGS_EXACT; ··· 826 806 __intel_pmu_pebs_event(event, iregs, at); 827 807 } 828 808 829 - static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) 809 + static void __intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, void *at, 810 + void *top) 830 811 { 831 812 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 832 813 struct debug_store *ds = cpuc->ds; 833 - struct pebs_record_nhm *at, *top; 834 814 struct perf_event *event = NULL; 835 815 u64 status = 0; 836 - int bit, n; 837 - 838 - if (!x86_pmu.pebs_active) 839 - return; 840 - 841 - at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base; 842 - top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index; 816 + int bit; 843 817 844 818 ds->pebs_index = ds->pebs_buffer_base; 845 819 846 - n = top - at; 847 - if (n <= 0) 848 - return; 820 + for (; at < top; at += x86_pmu.pebs_record_size) { 821 + struct pebs_record_nhm *p = at; 849 822 850 - /* 851 - * Should not happen, we program the threshold at 1 and do not 852 - * set a reset value. 853 - */ 854 - WARN_ONCE(n > x86_pmu.max_pebs_events, "Unexpected number of pebs records %d\n", n); 855 - 856 - for ( ; at < top; at++) { 857 - for_each_set_bit(bit, (unsigned long *)&at->status, x86_pmu.max_pebs_events) { 823 + for_each_set_bit(bit, (unsigned long *)&p->status, 824 + x86_pmu.max_pebs_events) { 858 825 event = cpuc->events[bit]; 859 826 if (!test_bit(bit, cpuc->active_mask)) 860 827 continue; ··· 862 855 863 856 __intel_pmu_pebs_event(event, iregs, at); 864 857 } 858 + } 859 + 860 + static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) 861 + { 862 + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 863 + struct debug_store *ds = cpuc->ds; 864 + struct pebs_record_nhm *at, *top; 865 + int n; 866 + 867 + if (!x86_pmu.pebs_active) 868 + return; 869 + 870 + at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base; 871 + top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index; 872 + 873 + ds->pebs_index = ds->pebs_buffer_base; 874 + 875 + n = top - at; 876 + if (n <= 0) 877 + return; 878 + 879 + /* 880 + * Should not happen, we program the threshold at 1 and do not 881 + * set a reset value. 882 + */ 883 + WARN_ONCE(n > x86_pmu.max_pebs_events, 884 + "Unexpected number of pebs records %d\n", n); 885 + 886 + return __intel_pmu_drain_pebs_nhm(iregs, at, top); 887 + } 888 + 889 + static void intel_pmu_drain_pebs_hsw(struct pt_regs *iregs) 890 + { 891 + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 892 + struct debug_store *ds = cpuc->ds; 893 + struct pebs_record_hsw *at, *top; 894 + int n; 895 + 896 + if (!x86_pmu.pebs_active) 897 + return; 898 + 899 + at = (struct pebs_record_hsw *)(unsigned long)ds->pebs_buffer_base; 900 + top = (struct pebs_record_hsw *)(unsigned long)ds->pebs_index; 901 + 902 + n = top - at; 903 + if (n <= 0) 904 + return; 905 + /* 906 + * Should not happen, we program the threshold at 1 and do not 907 + * set a reset value. 908 + */ 909 + WARN_ONCE(n > x86_pmu.max_pebs_events, 910 + "Unexpected number of pebs records %d\n", n); 911 + 912 + return __intel_pmu_drain_pebs_nhm(iregs, at, top); 865 913 } 866 914 867 915 /* ··· 948 886 printk(KERN_CONT "PEBS fmt1%c, ", pebs_type); 949 887 x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm); 950 888 x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; 889 + break; 890 + 891 + case 2: 892 + pr_cont("PEBS fmt2%c, ", pebs_type); 893 + x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw); 894 + x86_pmu.drain_pebs = intel_pmu_drain_pebs_hsw; 951 895 break; 952 896 953 897 default: