Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'perf-core-for-mingo-20161005' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

- Intel PT timestamp fixes (Adrian Hunter)

- Fix Intel JSON fixed counter conversions (Andi Kleen)

- Sync memcpy, cpufeatures and bpf headers with the kernel (Arnaldo Carvalho de Melo)

- Add some more tool tips (Donghyun Kim, Kim SeonYoung, Nambong Ha)

- Fix libtraceevent's kbuffer_read_at_offset() handling of offsets before or
equal the first event (Namhyung Kim)

- Fix uretprobe probe placement on ppc64le (Ravi Bangoria)

- Support building C++ source files and add feature detection for g++,
prep work for supporting a builtin clang/llvm, to remove the need for having
that toolchain installed to automagically build BPF scriptlets that then
gets uploaded to the kernel via sys_bpf() (Wang Nan)

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>

+85 -9
-1
tools/arch/x86/include/asm/cpufeatures.h
··· 106 106 #define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */ 107 107 #define X86_FEATURE_EAGER_FPU ( 3*32+29) /* "eagerfpu" Non lazy FPU restore */ 108 108 #define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ 109 - #define X86_FEATURE_MCE_RECOVERY ( 3*32+31) /* cpu has recoverable machine checks */ 110 109 111 110 /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ 112 111 #define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */
+3 -3
tools/arch/x86/lib/memcpy_64.S
··· 181 181 182 182 #ifndef CONFIG_UML 183 183 /* 184 - * memcpy_mcsafe - memory copy with machine check exception handling 184 + * memcpy_mcsafe_unrolled - memory copy with machine check exception handling 185 185 * Note that we only catch machine checks when reading the source addresses. 186 186 * Writes to target are posted and don't generate machine checks. 187 187 */ 188 - ENTRY(memcpy_mcsafe) 188 + ENTRY(memcpy_mcsafe_unrolled) 189 189 cmpl $8, %edx 190 190 /* Less than 8 bytes? Go to byte copy loop */ 191 191 jb .L_no_whole_words ··· 273 273 .L_done_memcpy_trap: 274 274 xorq %rax, %rax 275 275 ret 276 - ENDPROC(memcpy_mcsafe) 276 + ENDPROC(memcpy_mcsafe_unrolled) 277 277 278 278 .section .fixup, "ax" 279 279 /* Return -EFAULT for any failure */
+1
tools/build/Build.include
··· 90 90 # - per object C flags 91 91 # - BUILD_STR macro to allow '-D"$(variable)"' constructs 92 92 c_flags = -Wp,-MD,$(depfile),-MT,$@ $(CFLAGS) -D"BUILD_STR(s)=\#s" $(CFLAGS_$(basetarget).o) $(CFLAGS_$(obj)) 93 + cxx_flags = -Wp,-MD,$(depfile),-MT,$@ $(CXXFLAGS) -D"BUILD_STR(s)=\#s" $(CXXFLAGS_$(basetarget).o) $(CXXFLAGS_$(obj)) 93 94 94 95 ### 95 96 ## HOSTCC C flags
+7
tools/build/Makefile.build
··· 61 61 quiet_cmd_host_cc_o_c = HOSTCC $@ 62 62 cmd_host_cc_o_c = $(HOSTCC) $(host_c_flags) -c -o $@ $< 63 63 64 + quiet_cmd_cxx_o_c = CXX $@ 65 + cmd_cxx_o_c = $(CXX) $(cxx_flags) -c -o $@ $< 66 + 64 67 quiet_cmd_cpp_i_c = CPP $@ 65 68 cmd_cpp_i_c = $(CC) $(c_flags) -E -o $@ $< 66 69 ··· 90 87 $(OUTPUT)%.o: %.c FORCE 91 88 $(call rule_mkdir) 92 89 $(call if_changed_dep,$(host)cc_o_c) 90 + 91 + $(OUTPUT)%.o: %.cpp FORCE 92 + $(call rule_mkdir) 93 + $(call if_changed_dep,cxx_o_c) 93 94 94 95 $(OUTPUT)%.o: %.S FORCE 95 96 $(call rule_mkdir)
+1 -1
tools/build/Makefile.feature
··· 7 7 8 8 feature_check = $(eval $(feature_check_code)) 9 9 define feature_check_code 10 - feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS) $(FEATURE_CHECK_CFLAGS-$(1))" LDFLAGS="$(LDFLAGS) $(FEATURE_CHECK_LDFLAGS-$(1))" -C $(feature_dir) $(OUTPUT_FEATURES)test-$1.bin >/dev/null 2>/dev/null && echo 1 || echo 0) 10 + feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS) $(FEATURE_CHECK_CFLAGS-$(1))" CXXFLAGS="$(EXTRA_CXXFLAGS) $(FEATURE_CHECK_CXXFLAGS-$(1))" LDFLAGS="$(LDFLAGS) $(FEATURE_CHECK_LDFLAGS-$(1))" -C $(feature_dir) $(OUTPUT_FEATURES)test-$1.bin >/dev/null 2>/dev/null && echo 1 || echo 0) 11 11 endef 12 12 13 13 feature_set = $(eval $(feature_set_code))
+9 -1
tools/build/feature/Makefile
··· 46 46 test-lzma.bin \ 47 47 test-bpf.bin \ 48 48 test-get_cpuid.bin \ 49 - test-sdt.bin 49 + test-sdt.bin \ 50 + test-cxx.bin 50 51 51 52 FILES := $(addprefix $(OUTPUT),$(FILES)) 52 53 53 54 CC := $(CROSS_COMPILE)gcc -MD 55 + CXX := $(CROSS_COMPILE)g++ -MD 54 56 PKG_CONFIG := $(CROSS_COMPILE)pkg-config 55 57 56 58 all: $(FILES) 57 59 58 60 __BUILD = $(CC) $(CFLAGS) -Wall -Werror -o $@ $(patsubst %.bin,%.c,$(@F)) $(LDFLAGS) 59 61 BUILD = $(__BUILD) > $(@:.bin=.make.output) 2>&1 62 + 63 + __BUILDXX = $(CXX) $(CXXFLAGS) -Wall -Werror -o $@ $(patsubst %.bin,%.cpp,$(@F)) $(LDFLAGS) 64 + BUILDXX = $(__BUILDXX) > $(@:.bin=.make.output) 2>&1 60 65 61 66 ############################### 62 67 ··· 221 216 222 217 $(OUTPUT)test-sdt.bin: 223 218 $(BUILD) 219 + 220 + $(OUTPUT)test-cxx.bin: 221 + $(BUILDXX) -std=gnu++11 224 222 225 223 -include $(OUTPUT)*.d 226 224
+15
tools/build/feature/test-cxx.cpp
··· 1 + #include <iostream> 2 + #include <memory> 3 + 4 + static void print_str(std::string s) 5 + { 6 + std::cout << s << std::endl; 7 + } 8 + 9 + int main() 10 + { 11 + std::string s("Hello World!"); 12 + print_str(std::move(s)); 13 + std::cout << "|" << s << "|" << std::endl; 14 + return 0; 15 + }
+2 -2
tools/include/uapi/linux/bpf.h
··· 339 339 BPF_FUNC_skb_change_type, 340 340 341 341 /** 342 - * bpf_skb_in_cgroup(skb, map, index) - Check cgroup2 membership of skb 342 + * bpf_skb_under_cgroup(skb, map, index) - Check cgroup2 membership of skb 343 343 * @skb: pointer to skb 344 344 * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type 345 345 * @index: index of the cgroup in the bpf_map ··· 348 348 * == 1 skb succeeded the cgroup2 descendant test 349 349 * < 0 error 350 350 */ 351 - BPF_FUNC_skb_in_cgroup, 351 + BPF_FUNC_skb_under_cgroup, 352 352 353 353 /** 354 354 * bpf_get_hash_recalc(skb)
+1
tools/lib/traceevent/kbuffer-parse.c
··· 622 622 623 623 /* Reset the buffer */ 624 624 kbuffer_load_subbuffer(kbuf, kbuf->subbuffer); 625 + data = kbuffer_read_event(kbuf, ts); 625 626 626 627 while (kbuf->curr < offset) { 627 628 data = kbuffer_next_event(kbuf, ts);
+4
tools/perf/Documentation/tips.txt
··· 28 28 See assembly instructions with percentage: perf annotate <symbol> 29 29 If you prefer Intel style assembly, try: perf annotate -M intel 30 30 For hierarchical output, try: perf report --hierarchy 31 + Order by the overhead of source file name and line number: perf report -s srcline 32 + System-wide collection from all CPUs: perf record -a 33 + Show current config key-value pairs: perf config --list 34 + Show user configuration overrides: perf config --user --list
+2 -1
tools/perf/arch/powerpc/util/sym-handling.c
··· 82 82 * 83 83 * In addition, we shouldn't specify an offset for kretprobes. 84 84 */ 85 - if (pev->point.offset || pev->point.retprobe || !map || !sym) 85 + if (pev->point.offset || (!pev->uprobes && pev->point.retprobe) || 86 + !map || !sym) 86 87 return; 87 88 88 89 lep_offset = PPC64_LOCAL_ENTRY_OFFSET(sym->arch_sym);
+2
tools/perf/pmu-events/jevents.c
··· 312 312 const char *event; 313 313 } fixed[] = { 314 314 { "inst_retired.any", "event=0xc0" }, 315 + { "inst_retired.any_p", "event=0xc0" }, 316 + { "cpu_clk_unhalted.ref", "event=0x0,umask=0x03" }, 315 317 { "cpu_clk_unhalted.thread", "event=0x3c" }, 316 318 { "cpu_clk_unhalted.thread_any", "event=0x3c,any=1" }, 317 319 { NULL, NULL},
+38
tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
··· 90 90 bool pge; 91 91 bool have_tma; 92 92 bool have_cyc; 93 + bool fixup_last_mtc; 93 94 uint64_t pos; 94 95 uint64_t last_ip; 95 96 uint64_t ip; ··· 587 586 uint64_t tsc_timestamp; 588 587 uint64_t timestamp; 589 588 bool have_tma; 589 + bool fixup_last_mtc; 590 590 bool from_mtc; 591 591 double cbr_cyc_to_tsc; 592 592 }; 593 + 594 + /* 595 + * MTC provides a 8-bit slice of CTC but the TMA packet only provides the lower 596 + * 16 bits of CTC. If mtc_shift > 8 then some of the MTC bits are not in the CTC 597 + * provided by the TMA packet. Fix-up the last_mtc calculated from the TMA 598 + * packet by copying the missing bits from the current MTC assuming the least 599 + * difference between the two, and that the current MTC comes after last_mtc. 600 + */ 601 + static void intel_pt_fixup_last_mtc(uint32_t mtc, int mtc_shift, 602 + uint32_t *last_mtc) 603 + { 604 + uint32_t first_missing_bit = 1U << (16 - mtc_shift); 605 + uint32_t mask = ~(first_missing_bit - 1); 606 + 607 + *last_mtc |= mtc & mask; 608 + if (*last_mtc >= mtc) { 609 + *last_mtc -= first_missing_bit; 610 + *last_mtc &= 0xff; 611 + } 612 + } 593 613 594 614 static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info) 595 615 { ··· 641 619 return 0; 642 620 643 621 mtc = pkt_info->packet.payload; 622 + if (decoder->mtc_shift > 8 && data->fixup_last_mtc) { 623 + data->fixup_last_mtc = false; 624 + intel_pt_fixup_last_mtc(mtc, decoder->mtc_shift, 625 + &data->last_mtc); 626 + } 644 627 if (mtc > data->last_mtc) 645 628 mtc_delta = mtc - data->last_mtc; 646 629 else ··· 714 687 715 688 data->ctc_delta = 0; 716 689 data->have_tma = true; 690 + data->fixup_last_mtc = true; 717 691 718 692 return 0; 719 693 ··· 781 753 .tsc_timestamp = decoder->tsc_timestamp, 782 754 .timestamp = decoder->timestamp, 783 755 .have_tma = decoder->have_tma, 756 + .fixup_last_mtc = decoder->fixup_last_mtc, 784 757 .from_mtc = from_mtc, 785 758 .cbr_cyc_to_tsc = 0, 786 759 }; ··· 1300 1271 } 1301 1272 decoder->ctc_delta = 0; 1302 1273 decoder->have_tma = true; 1274 + decoder->fixup_last_mtc = true; 1303 1275 intel_pt_log("CTC timestamp " x64_fmt " last MTC %#x CTC rem %#x\n", 1304 1276 decoder->ctc_timestamp, decoder->last_mtc, ctc_rem); 1305 1277 } ··· 1314 1284 return; 1315 1285 1316 1286 mtc = decoder->packet.payload; 1287 + 1288 + if (decoder->mtc_shift > 8 && decoder->fixup_last_mtc) { 1289 + decoder->fixup_last_mtc = false; 1290 + intel_pt_fixup_last_mtc(mtc, decoder->mtc_shift, 1291 + &decoder->last_mtc); 1292 + } 1317 1293 1318 1294 if (mtc > decoder->last_mtc) 1319 1295 mtc_delta = mtc - decoder->last_mtc; ··· 1389 1353 timestamp, decoder->timestamp); 1390 1354 else 1391 1355 decoder->timestamp = timestamp; 1356 + 1357 + decoder->timestamp_insn_cnt = 0; 1392 1358 } 1393 1359 1394 1360 /* Walk PSB+ packets when already in sync. */