Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf fixes from Ingo Molnar:
"Mostly tooling fixes, plus a static key fix fixing /sys/devices/cpu/rdpmc"

* 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
perf tools: Really allow to specify custom CC, AR or LD
perf auxtrace: Fix misplaced check for HAVE_SYNC_COMPARE_AND_SWAP_SUPPORT
perf hists browser: Take the --comm, --dsos, etc filters into account
perf symbols: Store if there is a filter in place
x86, perf: Fix static_key bug in load_mm_cr4()
tools: Copy lib/hweight.c from the kernel sources
perf tools: Fix the detached tarball wrt rbtree copy
perf thread_map: Fix the sizeof() calculation for map entries
tools lib: Improve clean target
perf stat: Fix shadow declaration of close
perf tools: Fix lockup using 32-bit compat vdso

Changed files
+101 -26
arch
x86
include
tools
+1 -1
arch/x86/include/asm/mmu_context.h
··· 23 24 static inline void load_mm_cr4(struct mm_struct *mm) 25 { 26 - if (static_key_true(&rdpmc_always_available) || 27 atomic_read(&mm->context.perf_rdpmc_allowed)) 28 cr4_set_bits(X86_CR4_PCE); 29 else
··· 23 24 static inline void load_mm_cr4(struct mm_struct *mm) 25 { 26 + if (static_key_false(&rdpmc_always_available) || 27 atomic_read(&mm->context.perf_rdpmc_allowed)) 28 cr4_set_bits(X86_CR4_PCE); 29 else
+1 -1
tools/lib/api/Makefile
··· 36 37 clean: 38 $(call QUIET_CLEAN, libapi) $(RM) $(LIBFILE); \ 39 - find $(if $(OUTPUT),$(OUTPUT),.) -name \*.o | xargs $(RM) 40 41 FORCE: 42
··· 36 37 clean: 38 $(call QUIET_CLEAN, libapi) $(RM) $(LIBFILE); \ 39 + find $(if $(OUTPUT),$(OUTPUT),.) -name \*.o -or -name \*.o.cmd -or -name \*.o.d | xargs $(RM) 40 41 FORCE: 42
+62
tools/lib/hweight.c
···
··· 1 + #include <linux/bitops.h> 2 + #include <asm/types.h> 3 + 4 + /** 5 + * hweightN - returns the hamming weight of a N-bit word 6 + * @x: the word to weigh 7 + * 8 + * The Hamming Weight of a number is the total number of bits set in it. 9 + */ 10 + 11 + unsigned int __sw_hweight32(unsigned int w) 12 + { 13 + #ifdef CONFIG_ARCH_HAS_FAST_MULTIPLIER 14 + w -= (w >> 1) & 0x55555555; 15 + w = (w & 0x33333333) + ((w >> 2) & 0x33333333); 16 + w = (w + (w >> 4)) & 0x0f0f0f0f; 17 + return (w * 0x01010101) >> 24; 18 + #else 19 + unsigned int res = w - ((w >> 1) & 0x55555555); 20 + res = (res & 0x33333333) + ((res >> 2) & 0x33333333); 21 + res = (res + (res >> 4)) & 0x0F0F0F0F; 22 + res = res + (res >> 8); 23 + return (res + (res >> 16)) & 0x000000FF; 24 + #endif 25 + } 26 + 27 + unsigned int __sw_hweight16(unsigned int w) 28 + { 29 + unsigned int res = w - ((w >> 1) & 0x5555); 30 + res = (res & 0x3333) + ((res >> 2) & 0x3333); 31 + res = (res + (res >> 4)) & 0x0F0F; 32 + return (res + (res >> 8)) & 0x00FF; 33 + } 34 + 35 + unsigned int __sw_hweight8(unsigned int w) 36 + { 37 + unsigned int res = w - ((w >> 1) & 0x55); 38 + res = (res & 0x33) + ((res >> 2) & 0x33); 39 + return (res + (res >> 4)) & 0x0F; 40 + } 41 + 42 + unsigned long __sw_hweight64(__u64 w) 43 + { 44 + #if BITS_PER_LONG == 32 45 + return __sw_hweight32((unsigned int)(w >> 32)) + 46 + __sw_hweight32((unsigned int)w); 47 + #elif BITS_PER_LONG == 64 48 + #ifdef CONFIG_ARCH_HAS_FAST_MULTIPLIER 49 + w -= (w >> 1) & 0x5555555555555555ul; 50 + w = (w & 0x3333333333333333ul) + ((w >> 2) & 0x3333333333333333ul); 51 + w = (w + (w >> 4)) & 0x0f0f0f0f0f0f0f0ful; 52 + return (w * 0x0101010101010101ul) >> 56; 53 + #else 54 + __u64 res = w - ((w >> 1) & 0x5555555555555555ul); 55 + res = (res & 0x3333333333333333ul) + ((res >> 2) & 0x3333333333333333ul); 56 + res = (res + (res >> 4)) & 0x0F0F0F0F0F0F0F0Ful; 57 + res = res + (res >> 8); 58 + res = res + (res >> 16); 59 + return (res + (res >> 32)) & 0x00000000000000FFul; 60 + #endif 61 + #endif 62 + }
+1 -1
tools/lib/traceevent/Makefile
··· 268 269 clean: 270 $(call QUIET_CLEAN, libtraceevent) \ 271 - $(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES) .*.d \ 272 $(RM) TRACEEVENT-CFLAGS tags TAGS 273 274 PHONY += force plugins
··· 268 269 clean: 270 $(call QUIET_CLEAN, libtraceevent) \ 271 + $(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES) .*.d .*.cmd \ 272 $(RM) TRACEEVENT-CFLAGS tags TAGS 273 274 PHONY += force plugins
+1 -1
tools/perf/MANIFEST
··· 18 tools/arch/x86/include/asm/rmwcc.h 19 tools/lib/traceevent 20 tools/lib/api 21 tools/lib/rbtree.c 22 tools/lib/symbol/kallsyms.c 23 tools/lib/symbol/kallsyms.h ··· 58 include/linux/list.h 59 include/linux/hash.h 60 include/linux/stringify.h 61 - lib/hweight.c 62 include/linux/swab.h 63 arch/*/include/asm/unistd*.h 64 arch/*/include/uapi/asm/unistd*.h
··· 18 tools/arch/x86/include/asm/rmwcc.h 19 tools/lib/traceevent 20 tools/lib/api 21 + tools/lib/hweight.c 22 tools/lib/rbtree.c 23 tools/lib/symbol/kallsyms.c 24 tools/lib/symbol/kallsyms.h ··· 57 include/linux/list.h 58 include/linux/hash.h 59 include/linux/stringify.h 60 include/linux/swab.h 61 arch/*/include/asm/unistd*.h 62 arch/*/include/uapi/asm/unistd*.h
+16 -3
tools/perf/Makefile.perf
··· 109 $(Q)$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT) 110 $(Q)touch $(OUTPUT)PERF-VERSION-FILE 111 112 - CC = $(CROSS_COMPILE)gcc 113 - LD ?= $(CROSS_COMPILE)ld 114 - AR = $(CROSS_COMPILE)ar 115 PKG_CONFIG = $(CROSS_COMPILE)pkg-config 116 117 RM = rm -f
··· 109 $(Q)$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT) 110 $(Q)touch $(OUTPUT)PERF-VERSION-FILE 111 112 + # Makefiles suck: This macro sets a default value of $(2) for the 113 + # variable named by $(1), unless the variable has been set by 114 + # environment or command line. This is necessary for CC and AR 115 + # because make sets default values, so the simpler ?= approach 116 + # won't work as expected. 117 + define allow-override 118 + $(if $(or $(findstring environment,$(origin $(1))),\ 119 + $(findstring command line,$(origin $(1)))),,\ 120 + $(eval $(1) = $(2))) 121 + endef 122 + 123 + # Allow setting CC and AR and LD, or setting CROSS_COMPILE as a prefix. 124 + $(call allow-override,CC,$(CROSS_COMPILE)gcc) 125 + $(call allow-override,AR,$(CROSS_COMPILE)ar) 126 + $(call allow-override,LD,$(CROSS_COMPILE)ld) 127 + 128 PKG_CONFIG = $(CROSS_COMPILE)pkg-config 129 130 RM = rm -f
+2 -2
tools/perf/builtin-stat.c
··· 343 return 0; 344 } 345 346 - static void read_counters(bool close) 347 { 348 struct perf_evsel *counter; 349 ··· 354 if (process_counter(counter)) 355 pr_warning("failed to process counter %s\n", counter->name); 356 357 - if (close) { 358 perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), 359 thread_map__nr(evsel_list->threads)); 360 }
··· 343 return 0; 344 } 345 346 + static void read_counters(bool close_counters) 347 { 348 struct perf_evsel *counter; 349 ··· 354 if (process_counter(counter)) 355 pr_warning("failed to process counter %s\n", counter->name); 356 357 + if (close_counters) { 358 perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), 359 thread_map__nr(evsel_list->threads)); 360 }
+1 -1
tools/perf/ui/browsers/hists.c
··· 48 49 static bool hist_browser__has_filter(struct hist_browser *hb) 50 { 51 - return hists__has_filter(hb->hists) || hb->min_pcnt; 52 } 53 54 static int hist_browser__get_folding(struct hist_browser *browser)
··· 48 49 static bool hist_browser__has_filter(struct hist_browser *hb) 50 { 51 + return hists__has_filter(hb->hists) || hb->min_pcnt || symbol_conf.has_filter; 52 } 53 54 static int hist_browser__get_folding(struct hist_browser *browser)
+1 -1
tools/perf/util/Build
··· 143 $(call rule_mkdir) 144 $(call if_changed_dep,cc_o_c) 145 146 - $(OUTPUT)util/hweight.o: ../../lib/hweight.c FORCE 147 $(call rule_mkdir) 148 $(call if_changed_dep,cc_o_c)
··· 143 $(call rule_mkdir) 144 $(call if_changed_dep,cc_o_c) 145 146 + $(OUTPUT)util/hweight.o: ../lib/hweight.c FORCE 147 $(call rule_mkdir) 148 $(call if_changed_dep,cc_o_c)
+5 -5
tools/perf/util/auxtrace.c
··· 53 { 54 struct perf_event_mmap_page *pc = userpg; 55 56 - #if BITS_PER_LONG != 64 && !defined(HAVE_SYNC_COMPARE_AND_SWAP_SUPPORT) 57 - pr_err("Cannot use AUX area tracing mmaps\n"); 58 - return -1; 59 - #endif 60 - 61 WARN_ONCE(mm->base, "Uninitialized auxtrace_mmap\n"); 62 63 mm->userpg = userpg; ··· 67 mm->base = NULL; 68 return 0; 69 } 70 71 pc->aux_offset = mp->offset; 72 pc->aux_size = mp->len;
··· 53 { 54 struct perf_event_mmap_page *pc = userpg; 55 56 WARN_ONCE(mm->base, "Uninitialized auxtrace_mmap\n"); 57 58 mm->userpg = userpg; ··· 72 mm->base = NULL; 73 return 0; 74 } 75 + 76 + #if BITS_PER_LONG != 64 && !defined(HAVE_SYNC_COMPARE_AND_SWAP_SUPPORT) 77 + pr_err("Cannot use AUX area tracing mmaps\n"); 78 + return -1; 79 + #endif 80 81 pc->aux_offset = mp->offset; 82 pc->aux_size = mp->len;
+2 -2
tools/perf/util/python-ext-sources
··· 10 util/evlist.c 11 util/evsel.c 12 util/cpumap.c 13 - ../../lib/hweight.c 14 util/thread_map.c 15 util/util.c 16 util/xyarray.c ··· 19 util/stat.c 20 util/strlist.c 21 util/trace-event.c 22 - ../../lib/rbtree.c 23 util/string.c
··· 10 util/evlist.c 11 util/evsel.c 12 util/cpumap.c 13 + ../lib/hweight.c 14 util/thread_map.c 15 util/util.c 16 util/xyarray.c ··· 19 util/stat.c 20 util/strlist.c 21 util/trace-event.c 22 + ../lib/rbtree.c 23 util/string.c
+2
tools/perf/util/symbol.c
··· 1911 pr_err("problems parsing %s list\n", list_name); 1912 return -1; 1913 } 1914 return 0; 1915 } 1916
··· 1911 pr_err("problems parsing %s list\n", list_name); 1912 return -1; 1913 } 1914 + 1915 + symbol_conf.has_filter = true; 1916 return 0; 1917 } 1918
+2 -1
tools/perf/util/symbol.h
··· 105 demangle_kernel, 106 filter_relative, 107 show_hist_headers, 108 - branch_callstack; 109 const char *vmlinux_name, 110 *kallsyms_name, 111 *source_prefix,
··· 105 demangle_kernel, 106 filter_relative, 107 show_hist_headers, 108 + branch_callstack, 109 + has_filter; 110 const char *vmlinux_name, 111 *kallsyms_name, 112 *source_prefix,
+1 -2
tools/perf/util/thread_map.c
··· 136 if (grow) { 137 struct thread_map *tmp; 138 139 - tmp = realloc(threads, (sizeof(*threads) + 140 - max_threads * sizeof(pid_t))); 141 if (tmp == NULL) 142 goto out_free_namelist; 143
··· 136 if (grow) { 137 struct thread_map *tmp; 138 139 + tmp = thread_map__realloc(threads, max_threads); 140 if (tmp == NULL) 141 goto out_free_namelist; 142
+3 -5
tools/perf/util/vdso.c
··· 236 const char *file_name; 237 struct dso *dso; 238 239 - pthread_rwlock_wrlock(&machine->dsos.lock); 240 dso = __dsos__find(&machine->dsos, vdso_file->dso_name, true); 241 if (dso) 242 - goto out_unlock; 243 244 file_name = vdso__get_compat_file(vdso_file); 245 if (!file_name) 246 - goto out_unlock; 247 248 dso = __machine__addnew_vdso(machine, vdso_file->dso_name, file_name); 249 - out_unlock: 250 - pthread_rwlock_unlock(&machine->dsos.lock); 251 return dso; 252 } 253
··· 236 const char *file_name; 237 struct dso *dso; 238 239 dso = __dsos__find(&machine->dsos, vdso_file->dso_name, true); 240 if (dso) 241 + goto out; 242 243 file_name = vdso__get_compat_file(vdso_file); 244 if (!file_name) 245 + goto out; 246 247 dso = __machine__addnew_vdso(machine, vdso_file->dso_name, file_name); 248 + out: 249 return dso; 250 } 251