Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf record: Add ratio-to-prev term

Provide ratio-to-prev term which allows the user to
set the event sample period of two events corresponding
to a desired ratio.

If using on an Intel x86 platform with Auto Counter Reload support, also
set corresponding event's config2 attribute with a bitmask which
counters to reset and which counters to sample if the desired ratio is
met or exceeded.

On other platforms, only the sample period is affected by the
ratio-to-prev term.

Reviewed-by: Ian Rogers <irogers@google.com>
Signed-off-by: Thomas Falcon <thomas.falcon@intel.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Dapeng Mi <dapeng1.mi@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

Thomas Falcon and committed by
Arnaldo Carvalho de Melo
6b9c0261 584754cb

+212 -2
+53
tools/perf/Documentation/intel-acr.txt
··· 1 + Intel Auto Counter Reload Support 2 + --------------------------------- 3 + Support for Intel Auto Counter Reload in perf tools 4 + 5 + Auto counter reload provides a means for software to specify to hardware 6 + that certain counters, if supported, should be automatically reloaded 7 + upon overflow of chosen counters. By taking a sample only if the rate of 8 + one event exceeds some threshold relative to the rate of another event, 9 + this feature enables software to sample based on the relative rate of 10 + two or more events. To enable this, the user must provide a sample period 11 + term and a bitmask ("acr_mask") for each relevant event specifying the 12 + counters in an event group to reload if the event's specified sample 13 + period is exceeded. 14 + 15 + For example, if the user desires to measure a scenario when IPC > 2, 16 + the event group might look like the one below: 17 + 18 + perf record -e {cpu_atom/instructions,period=200000,acr_mask=0x2/, \ 19 + cpu_atom/cycles,period=100000,acr_mask=0x3/} -- true 20 + 21 + In this case, if the "instructions" counter exceeds the sample period of 22 + 200000, the second counter, "cycles", will be reset and a sample will be 23 + taken. If "cycles" is exceeded first, both counters in the group will be 24 + reset. In this way, samples will only be taken for cases where IPC > 2. 25 + 26 + The acr_mask term is a hexadecimal value representing a bitmask of the 27 + events in the group to be reset when the period is exceeded. In the 28 + example above, "instructions" is assigned an acr_mask of 0x2, meaning 29 + only the second event in the group is reloaded and a sample is taken 30 + for the first event. "cycles" is assigned an acr_mask of 0x3, meaning 31 + that both event counters will be reset if the sample period is exceeded 32 + first. 33 + 34 + ratio-to-prev Event Term 35 + ------------------------ 36 + To simplify this, an event term "ratio-to-prev" is provided which is used 37 + alongside the sample period term n or the -c/--count option. This would 38 + allow users to specify the desired relative rate between events as a 39 + ratio. Note: Both events compared must belong to the same PMU. 40 + 41 + The command above would then become 42 + 43 + perf record -e {cpu_atom/instructions/, \ 44 + cpu_atom/cycles,period=100000,ratio-to-prev=0.5/} -- true 45 + 46 + ratio-to-prev is the ratio of the event using the term relative 47 + to the previous event in the group, which will always be 1, 48 + for a 1:0.5 or 2:1 ratio. 49 + 50 + To sample for IPC < 2 for example, the events need to be reordered: 51 + 52 + perf record -e {cpu_atom/cycles/, \ 53 + cpu_atom/instructions,period=200000,ratio-to-prev=2.0/} -- true
+2
tools/perf/Documentation/perf-list.txt
··· 393 393 . '--raw-dump [hw|sw|cache|tracepoint|pmu|event_glob]', shows the raw-dump of 394 394 a certain kind of events. 395 395 396 + include::intel-acr.txt[] 397 + 396 398 SEE ALSO 397 399 -------- 398 400 linkperf:perf-stat[1], linkperf:perf-top[1],
+52
tools/perf/arch/x86/util/evsel.c
··· 4 4 #include <stdlib.h> 5 5 #include "util/evlist.h" 6 6 #include "util/evsel.h" 7 + #include "util/evsel_config.h" 7 8 #include "util/env.h" 8 9 #include "util/pmu.h" 9 10 #include "util/pmus.h" ··· 70 69 return scnprintf(bf, size, "%s/%s/", 71 70 evsel->pmu ? evsel->pmu->name : "cpu", 72 71 event_name); 72 + } 73 + 74 + void arch_evsel__apply_ratio_to_prev(struct evsel *evsel, 75 + struct perf_event_attr *attr) 76 + { 77 + struct perf_event_attr *prev_attr = NULL; 78 + struct evsel *evsel_prev = NULL; 79 + const char *name = "acr_mask"; 80 + int evsel_idx = 0; 81 + __u64 ev_mask, pr_ev_mask; 82 + 83 + if (!perf_pmu__has_format(evsel->pmu, name)) { 84 + pr_err("'%s' does not have acr_mask format support\n", evsel->pmu->name); 85 + return; 86 + } 87 + if (perf_pmu__format_type(evsel->pmu, name) != 88 + PERF_PMU_FORMAT_VALUE_CONFIG2) { 89 + pr_err("'%s' does not have config2 format support\n", evsel->pmu->name); 90 + return; 91 + } 92 + 93 + evsel_prev = evsel__prev(evsel); 94 + if (!evsel_prev) { 95 + pr_err("Previous event does not exist.\n"); 96 + return; 97 + } 98 + 99 + prev_attr = &evsel_prev->core.attr; 100 + 101 + if (prev_attr->config2) { 102 + pr_err("'%s' has set config2 (acr_mask?) already, configuration not supported\n", evsel_prev->name); 103 + return; 104 + } 105 + 106 + /* 107 + * acr_mask (config2) is calculated using the event's index in 108 + * the event group. The first event will use the index of the 109 + * second event as its mask (e.g., 0x2), indicating that the 110 + * second event counter will be reset and a sample taken for 111 + * the first event if its counter overflows. The second event 112 + * will use the mask consisting of the first and second bits 113 + * (e.g., 0x3), meaning both counters will be reset if the 114 + * second event counter overflows. 115 + */ 116 + 117 + evsel_idx = evsel__group_idx(evsel); 118 + ev_mask = 1ull << evsel_idx; 119 + pr_ev_mask = 1ull << (evsel_idx - 1); 120 + 121 + prev_attr->config2 = ev_mask; 122 + attr->config2 = ev_mask | pr_ev_mask; 73 123 } 74 124 75 125 static void ibs_l3miss_warn(void)
+76
tools/perf/util/evsel.c
··· 1092 1092 } 1093 1093 } 1094 1094 1095 + static void evsel__apply_ratio_to_prev(struct evsel *evsel, 1096 + struct perf_event_attr *attr, 1097 + struct record_opts *opts, 1098 + const char *buf) 1099 + { 1100 + struct perf_event_attr *prev_attr = NULL; 1101 + struct evsel *evsel_prev = NULL; 1102 + u64 type = evsel->core.attr.sample_type; 1103 + u64 prev_type = 0; 1104 + double rtp; 1105 + 1106 + rtp = strtod(buf, NULL); 1107 + if (rtp <= 0) { 1108 + pr_err("Invalid ratio-to-prev value %lf\n", rtp); 1109 + return; 1110 + } 1111 + if (evsel == evsel__leader(evsel)) { 1112 + pr_err("Invalid use of ratio-to-prev term without preceding element in group\n"); 1113 + return; 1114 + } 1115 + if (!evsel->pmu->is_core) { 1116 + pr_err("Event using ratio-to-prev term must have a core PMU\n"); 1117 + return; 1118 + } 1119 + 1120 + evsel_prev = evsel__prev(evsel); 1121 + if (!evsel_prev) { 1122 + pr_err("Previous event does not exist.\n"); 1123 + return; 1124 + } 1125 + 1126 + if (evsel_prev->pmu->type != evsel->pmu->type) { 1127 + pr_err("Compared events (\"%s\", \"%s\") must have same PMU\n", 1128 + evsel->name, evsel_prev->name); 1129 + return; 1130 + } 1131 + 1132 + prev_attr = &evsel_prev->core.attr; 1133 + prev_type = evsel_prev->core.attr.sample_type; 1134 + 1135 + if (!(prev_type & PERF_SAMPLE_PERIOD)) { 1136 + attr->sample_period = prev_attr->sample_period * rtp; 1137 + attr->freq = 0; 1138 + evsel__reset_sample_bit(evsel, PERIOD); 1139 + } else if (!(type & PERF_SAMPLE_PERIOD)) { 1140 + prev_attr->sample_period = attr->sample_period / rtp; 1141 + prev_attr->freq = 0; 1142 + evsel__reset_sample_bit(evsel_prev, PERIOD); 1143 + } else { 1144 + if (opts->user_interval != ULLONG_MAX) { 1145 + prev_attr->sample_period = opts->user_interval; 1146 + attr->sample_period = prev_attr->sample_period * rtp; 1147 + prev_attr->freq = 0; 1148 + attr->freq = 0; 1149 + evsel__reset_sample_bit(evsel_prev, PERIOD); 1150 + evsel__reset_sample_bit(evsel, PERIOD); 1151 + } else { 1152 + pr_err("Event period term or count (-c) must be set when using ratio-to-prev term.\n"); 1153 + return; 1154 + } 1155 + } 1156 + 1157 + arch_evsel__apply_ratio_to_prev(evsel, attr); 1158 + } 1159 + 1095 1160 static void evsel__apply_config_terms(struct evsel *evsel, 1096 1161 struct record_opts *opts, bool track) 1097 1162 { ··· 1170 1105 u32 dump_size = 0; 1171 1106 int max_stack = 0; 1172 1107 const char *callgraph_buf = NULL; 1108 + const char *rtp_buf = NULL; 1173 1109 1174 1110 list_for_each_entry(term, config_terms, list) { 1175 1111 switch (term->type) { ··· 1241 1175 break; 1242 1176 case EVSEL__CONFIG_TERM_CFG_CHG: 1243 1177 break; 1178 + case EVSEL__CONFIG_TERM_RATIO_TO_PREV: 1179 + rtp_buf = term->val.str; 1180 + break; 1244 1181 default: 1245 1182 break; 1246 1183 } ··· 1295 1226 evsel__config_callchain(evsel, opts, &param); 1296 1227 } 1297 1228 } 1229 + if (rtp_buf) 1230 + evsel__apply_ratio_to_prev(evsel, attr, opts, rtp_buf); 1298 1231 } 1299 1232 1300 1233 struct evsel_config_term *__evsel__get_config_term(struct evsel *evsel, enum evsel_term_type type) ··· 1318 1247 1319 1248 void __weak arch__post_evsel_config(struct evsel *evsel __maybe_unused, 1320 1249 struct perf_event_attr *attr __maybe_unused) 1250 + { 1251 + } 1252 + 1253 + void __weak arch_evsel__apply_ratio_to_prev(struct evsel *evsel __maybe_unused, 1254 + struct perf_event_attr *attr __maybe_unused) 1321 1255 { 1322 1256 } 1323 1257
+1
tools/perf/util/evsel.h
··· 342 342 void arch_evsel__set_sample_weight(struct evsel *evsel); 343 343 void arch__post_evsel_config(struct evsel *evsel, struct perf_event_attr *attr); 344 344 int arch_evsel__open_strerror(struct evsel *evsel, int err, char *msg, size_t size); 345 + void arch_evsel__apply_ratio_to_prev(struct evsel *evsel, struct perf_event_attr *attr); 345 346 346 347 int evsel__set_filter(struct evsel *evsel, const char *filter); 347 348 int evsel__append_tp_filter(struct evsel *evsel, const char *filter);
+1
tools/perf/util/evsel_config.h
··· 28 28 EVSEL__CONFIG_TERM_AUX_ACTION, 29 29 EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE, 30 30 EVSEL__CONFIG_TERM_CFG_CHG, 31 + EVSEL__CONFIG_TERM_RATIO_TO_PREV, 31 32 }; 32 33 33 34 struct evsel_config_term {
+22
tools/perf/util/parse-events.c
··· 842 842 [PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE] = "legacy-cache", 843 843 [PARSE_EVENTS__TERM_TYPE_HARDWARE] = "hardware", 844 844 [PARSE_EVENTS__TERM_TYPE_CPU] = "cpu", 845 + [PARSE_EVENTS__TERM_TYPE_RATIO_TO_PREV] = "ratio-to-prev", 845 846 }; 846 847 if ((unsigned int)term_type >= __PARSE_EVENTS__TERM_TYPE_NR) 847 848 return "unknown term"; ··· 893 892 case PARSE_EVENTS__TERM_TYPE_RAW: 894 893 case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE: 895 894 case PARSE_EVENTS__TERM_TYPE_HARDWARE: 895 + case PARSE_EVENTS__TERM_TYPE_RATIO_TO_PREV: 896 896 default: 897 897 if (!err) 898 898 return false; ··· 1047 1045 perf_cpu_map__put(map); 1048 1046 break; 1049 1047 } 1048 + case PARSE_EVENTS__TERM_TYPE_RATIO_TO_PREV: 1049 + CHECK_TYPE_VAL(STR); 1050 + if (strtod(term->val.str, NULL) <= 0) { 1051 + parse_events_error__handle(parse_state->error, term->err_val, 1052 + strdup("zero or negative"), 1053 + NULL); 1054 + return -EINVAL; 1055 + } 1056 + if (errno == ERANGE) { 1057 + parse_events_error__handle(parse_state->error, term->err_val, 1058 + strdup("too big"), 1059 + NULL); 1060 + return -EINVAL; 1061 + } 1062 + break; 1050 1063 case PARSE_EVENTS__TERM_TYPE_DRV_CFG: 1051 1064 case PARSE_EVENTS__TERM_TYPE_USER: 1052 1065 case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE: ··· 1190 1173 case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE: 1191 1174 case PARSE_EVENTS__TERM_TYPE_HARDWARE: 1192 1175 case PARSE_EVENTS__TERM_TYPE_CPU: 1176 + case PARSE_EVENTS__TERM_TYPE_RATIO_TO_PREV: 1193 1177 default: 1194 1178 parse_events_error__handle(parse_state->error, term->err_term, 1195 1179 strdup(parse_events__term_type_str(term->type_term)), ··· 1313 1295 ADD_CONFIG_TERM_VAL(AUX_SAMPLE_SIZE, aux_sample_size, 1314 1296 term->val.num, term->weak); 1315 1297 break; 1298 + case PARSE_EVENTS__TERM_TYPE_RATIO_TO_PREV: 1299 + ADD_CONFIG_TERM_STR(RATIO_TO_PREV, term->val.str, term->weak); 1300 + break; 1316 1301 case PARSE_EVENTS__TERM_TYPE_USER: 1317 1302 case PARSE_EVENTS__TERM_TYPE_CONFIG: 1318 1303 case PARSE_EVENTS__TERM_TYPE_CONFIG1: ··· 1382 1361 case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE: 1383 1362 case PARSE_EVENTS__TERM_TYPE_HARDWARE: 1384 1363 case PARSE_EVENTS__TERM_TYPE_CPU: 1364 + case PARSE_EVENTS__TERM_TYPE_RATIO_TO_PREV: 1385 1365 default: 1386 1366 break; 1387 1367 }
+2 -1
tools/perf/util/parse-events.h
··· 83 83 PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE, 84 84 PARSE_EVENTS__TERM_TYPE_HARDWARE, 85 85 PARSE_EVENTS__TERM_TYPE_CPU, 86 - #define __PARSE_EVENTS__TERM_TYPE_NR (PARSE_EVENTS__TERM_TYPE_CPU + 1) 86 + PARSE_EVENTS__TERM_TYPE_RATIO_TO_PREV, 87 + #define __PARSE_EVENTS__TERM_TYPE_NR (PARSE_EVENTS__TERM_TYPE_RATIO_TO_PREV + 1) 87 88 }; 88 89 89 90 struct parse_events_term {
+1
tools/perf/util/parse-events.l
··· 337 337 aux-sample-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE); } 338 338 metric-id { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_METRIC_ID); } 339 339 cpu { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CPU); } 340 + ratio-to-prev { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_RATIO_TO_PREV); } 340 341 cpu-cycles|cycles { return hw_term(yyscanner, PERF_COUNT_HW_CPU_CYCLES); } 341 342 stalled-cycles-frontend|idle-cycles-frontend { return hw_term(yyscanner, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND); } 342 343 stalled-cycles-backend|idle-cycles-backend { return hw_term(yyscanner, PERF_COUNT_HW_STALLED_CYCLES_BACKEND); }
+2 -1
tools/perf/util/pmu.c
··· 1541 1541 break; 1542 1542 case PARSE_EVENTS__TERM_TYPE_USER: /* Not hardcoded. */ 1543 1543 return -EINVAL; 1544 - case PARSE_EVENTS__TERM_TYPE_NAME ... PARSE_EVENTS__TERM_TYPE_CPU: 1544 + case PARSE_EVENTS__TERM_TYPE_NAME ... PARSE_EVENTS__TERM_TYPE_RATIO_TO_PREV: 1545 1545 /* Skip non-config terms. */ 1546 1546 break; 1547 1547 default: ··· 1930 1930 "aux-action=(pause|resume|start-paused)", 1931 1931 "aux-sample-size=number", 1932 1932 "cpu=number", 1933 + "ratio-to-prev=string", 1933 1934 }; 1934 1935 struct perf_pmu_format *format; 1935 1936 int ret;