Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf stat: Make stats work over the thread dimension

Now that we have space for thread dimension counts, let's store it.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1435310967-14570-7-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

Jiri Olsa and committed by
Arnaldo Carvalho de Melo
a6fa0038 a8e02324

+38 -34
+18 -14
tools/perf/builtin-stat.c
··· 166 166 zfree(&evsel->priv); 167 167 } 168 168 169 - static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel) 169 + static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel, 170 + int ncpus, int nthreads) 170 171 { 171 172 struct perf_counts *counts; 172 173 173 - counts = perf_counts__new(perf_evsel__nr_cpus(evsel)); 174 + counts = perf_counts__new(ncpus, nthreads); 174 175 if (counts) 175 176 evsel->prev_raw_counts = counts; 176 177 ··· 198 197 static int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw) 199 198 { 200 199 struct perf_evsel *evsel; 200 + int nthreads = thread_map__nr(evsel_list->threads); 201 201 202 202 evlist__for_each(evlist, evsel) { 203 + int ncpus = perf_evsel__nr_cpus(evsel); 204 + 203 205 if (perf_evsel__alloc_stat_priv(evsel) < 0 || 204 - perf_evsel__alloc_counts(evsel, perf_evsel__nr_cpus(evsel)) < 0 || 205 - (alloc_raw && perf_evsel__alloc_prev_raw_counts(evsel) < 0)) 206 + perf_evsel__alloc_counts(evsel, ncpus, nthreads) < 0 || 207 + (alloc_raw && perf_evsel__alloc_prev_raw_counts(evsel, ncpus, nthreads) < 0)) 206 208 goto out_free; 207 209 } 208 210 ··· 298 294 return 0; 299 295 } 300 296 301 - static int read_cb(struct perf_evsel *evsel, int cpu, int thread __maybe_unused, 297 + static int read_cb(struct perf_evsel *evsel, int cpu, int thread, 302 298 struct perf_counts_values *count) 303 299 { 304 300 struct perf_counts_values *aggr = &evsel->counts->aggr; ··· 318 314 case AGGR_SOCKET: 319 315 case AGGR_NONE: 320 316 if (!evsel->snapshot) 321 - perf_evsel__compute_deltas(evsel, cpu, count); 317 + perf_evsel__compute_deltas(evsel, cpu, thread, count); 322 318 perf_counts_values__scale(count, scale, NULL); 323 - *perf_counts(evsel->counts, cpu) = *count; 319 + *perf_counts(evsel->counts, cpu, thread) = *count; 324 320 if (aggr_mode == AGGR_NONE) 325 321 perf_stat__update_shadow_stats(evsel, count->values, cpu); 326 322 break; ··· 356 352 return -1; 357 353 358 354 if (!counter->snapshot) 359 - perf_evsel__compute_deltas(counter, -1, aggr); 355 + perf_evsel__compute_deltas(counter, -1, -1, aggr); 360 356 perf_counts_values__scale(aggr, scale, &counter->counts->scaled); 361 357 362 358 for (i = 0; i < 3; i++) ··· 809 805 s2 = aggr_get_id(evsel_list->cpus, cpu2); 810 806 if (s2 != id) 811 807 continue; 812 - val += perf_counts(counter->counts, cpu)->val; 813 - ena += perf_counts(counter->counts, cpu)->ena; 814 - run += perf_counts(counter->counts, cpu)->run; 808 + val += perf_counts(counter->counts, cpu, 0)->val; 809 + ena += perf_counts(counter->counts, cpu, 0)->ena; 810 + run += perf_counts(counter->counts, cpu, 0)->run; 815 811 nr++; 816 812 } 817 813 if (prefix) ··· 919 915 int cpu; 920 916 921 917 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { 922 - val = perf_counts(counter->counts, cpu)->val; 923 - ena = perf_counts(counter->counts, cpu)->ena; 924 - run = perf_counts(counter->counts, cpu)->run; 918 + val = perf_counts(counter->counts, cpu, 0)->val; 919 + ena = perf_counts(counter->counts, cpu, 0)->ena; 920 + run = perf_counts(counter->counts, cpu, 0)->run; 925 921 926 922 if (prefix) 927 923 fprintf(output, "%s", prefix);
+3 -3
tools/perf/tests/openat-syscall-all-cpus.c
··· 78 78 * we use the auto allocation it will allocate just for 1 cpu, 79 79 * as we start by cpu 0. 80 80 */ 81 - if (perf_evsel__alloc_counts(evsel, cpus->nr) < 0) { 81 + if (perf_evsel__alloc_counts(evsel, cpus->nr, 1) < 0) { 82 82 pr_debug("perf_evsel__alloc_counts(ncpus=%d)\n", cpus->nr); 83 83 goto out_close_fd; 84 84 } ··· 98 98 } 99 99 100 100 expected = nr_openat_calls + cpu; 101 - if (perf_counts(evsel->counts, cpu)->val != expected) { 101 + if (perf_counts(evsel->counts, cpu, 0)->val != expected) { 102 102 pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls on cpu %d, got %" PRIu64 "\n", 103 - expected, cpus->map[cpu], perf_counts(evsel->counts, cpu)->val); 103 + expected, cpus->map[cpu], perf_counts(evsel->counts, cpu, 0)->val); 104 104 err = -1; 105 105 } 106 106 }
+2 -2
tools/perf/tests/openat-syscall.c
··· 44 44 goto out_close_fd; 45 45 } 46 46 47 - if (perf_counts(evsel->counts, 0)->val != nr_openat_calls) { 47 + if (perf_counts(evsel->counts, 0, 0)->val != nr_openat_calls) { 48 48 pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls, got %" PRIu64 "\n", 49 - nr_openat_calls, perf_counts(evsel->counts, 0)->val); 49 + nr_openat_calls, perf_counts(evsel->counts, 0, 0)->val); 50 50 goto out_close_fd; 51 51 } 52 52
+6 -6
tools/perf/util/evsel.c
··· 898 898 free(evsel); 899 899 } 900 900 901 - void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu, 901 + void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu, int thread, 902 902 struct perf_counts_values *count) 903 903 { 904 904 struct perf_counts_values tmp; ··· 910 910 tmp = evsel->prev_raw_counts->aggr; 911 911 evsel->prev_raw_counts->aggr = *count; 912 912 } else { 913 - tmp = *perf_counts(evsel->prev_raw_counts, cpu); 914 - *perf_counts(evsel->prev_raw_counts, cpu) = *count; 913 + tmp = *perf_counts(evsel->prev_raw_counts, cpu, thread); 914 + *perf_counts(evsel->prev_raw_counts, cpu, thread) = *count; 915 915 } 916 916 917 917 count->val = count->val - tmp.val; ··· 964 964 if (FD(evsel, cpu, thread) < 0) 965 965 return -EINVAL; 966 966 967 - if (evsel->counts == NULL && perf_evsel__alloc_counts(evsel, cpu + 1) < 0) 967 + if (evsel->counts == NULL && perf_evsel__alloc_counts(evsel, cpu + 1, thread + 1) < 0) 968 968 return -ENOMEM; 969 969 970 970 if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) < 0) 971 971 return -errno; 972 972 973 - perf_evsel__compute_deltas(evsel, cpu, &count); 973 + perf_evsel__compute_deltas(evsel, cpu, thread, &count); 974 974 perf_counts_values__scale(&count, scale, NULL); 975 - *perf_counts(evsel->counts, cpu) = count; 975 + *perf_counts(evsel->counts, cpu, thread) = count; 976 976 return 0; 977 977 } 978 978
+1 -1
tools/perf/util/evsel.h
··· 112 112 void perf_counts_values__scale(struct perf_counts_values *count, 113 113 bool scale, s8 *pscaled); 114 114 115 - void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu, 115 + void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu, int thread, 116 116 struct perf_counts_values *count); 117 117 118 118 int perf_evsel__object_config(size_t object_size,
+4 -4
tools/perf/util/stat.c
··· 95 95 } 96 96 } 97 97 98 - struct perf_counts *perf_counts__new(int ncpus) 98 + struct perf_counts *perf_counts__new(int ncpus, int nthreads) 99 99 { 100 100 struct perf_counts *counts = zalloc(sizeof(*counts)); 101 101 102 102 if (counts) { 103 103 struct xyarray *cpu; 104 104 105 - cpu = xyarray__new(ncpus, 1, sizeof(struct perf_counts_values)); 105 + cpu = xyarray__new(ncpus, nthreads, sizeof(struct perf_counts_values)); 106 106 if (!cpu) { 107 107 free(counts); 108 108 return NULL; ··· 132 132 perf_counts__reset(evsel->counts); 133 133 } 134 134 135 - int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus) 135 + int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus, int nthreads) 136 136 { 137 - evsel->counts = perf_counts__new(ncpus); 137 + evsel->counts = perf_counts__new(ncpus, nthreads); 138 138 return evsel->counts != NULL ? 0 : -ENOMEM; 139 139 } 140 140
+4 -4
tools/perf/util/stat.h
··· 50 50 }; 51 51 52 52 static inline struct perf_counts_values* 53 - perf_counts(struct perf_counts *counts, int cpu) 53 + perf_counts(struct perf_counts *counts, int cpu, int thread) 54 54 { 55 - return xyarray__entry(counts->cpu, cpu, 0); 55 + return xyarray__entry(counts->cpu, cpu, thread); 56 56 } 57 57 58 58 void update_stats(struct stats *stats, u64 val); ··· 86 86 void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel, 87 87 double avg, int cpu, enum aggr_mode aggr); 88 88 89 - struct perf_counts *perf_counts__new(int ncpus); 89 + struct perf_counts *perf_counts__new(int ncpus, int nthreads); 90 90 void perf_counts__delete(struct perf_counts *counts); 91 91 92 92 void perf_evsel__reset_counts(struct perf_evsel *evsel); 93 - int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus); 93 + int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus, int nthreads); 94 94 void perf_evsel__free_counts(struct perf_evsel *evsel); 95 95 #endif