Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf tooling fixes from Thomas Gleixner:

- fix 'perf test Session topology' segfault on s390 (Thomas Richter)

- fix NULL return handling in bpf__prepare_load() (YueHaibing)

- fix indexing on Coresight ETM packet queue decoder (Mathieu Poirier)

- fix perf.data format description of NRCPUS header (Arnaldo Carvalho
de Melo)

- update perf.data documentation section on cpu topology

- handle uncore event aliases in small groups properly (Kan Liang)

- add missing perf_sample.addr into python sample dictionary (Leo Yan)

* 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
perf tools: Fix perf.data format description of NRCPUS header
perf script python: Add addr into perf sample dict
perf data: Update documentation section on cpu topology
perf cs-etm: Fix indexing for decoder packet queue
perf bpf: Fix NULL return handling in bpf__prepare_load()
perf test: "Session topology" dumps core on s390
perf parse-events: Handle uncore event aliases in small groups properly

Changed files
+185 -21
tools
+9 -1
tools/perf/Documentation/perf.data-file-format.txt
··· 111 111 A structure defining the number of CPUs. 112 112 113 113 struct nr_cpus { 114 - uint32_t nr_cpus_online; 115 114 uint32_t nr_cpus_available; /* CPUs not yet onlined */ 115 + uint32_t nr_cpus_online; 116 116 }; 117 117 118 118 HEADER_CPUDESC = 8, ··· 153 153 HEADER_CPU_TOPOLOGY = 13, 154 154 155 155 String lists defining the core and CPU threads topology. 156 + The string lists are followed by a variable length array 157 + which contains core_id and socket_id of each cpu. 158 + The number of entries can be determined by the size of the 159 + section minus the sizes of both string lists. 156 160 157 161 struct { 158 162 struct perf_header_string_list cores; /* Variable length */ 159 163 struct perf_header_string_list threads; /* Variable length */ 164 + struct { 165 + uint32_t core_id; 166 + uint32_t socket_id; 167 + } cpus[nr]; /* Variable length records */ 160 168 }; 161 169 162 170 Example:
+24 -6
tools/perf/tests/topology.c
··· 70 70 session = perf_session__new(&data, false, NULL); 71 71 TEST_ASSERT_VAL("can't get session", session); 72 72 73 + /* On platforms with large numbers of CPUs process_cpu_topology() 74 + * might issue an error while reading the perf.data file section 75 + * HEADER_CPU_TOPOLOGY and the cpu_topology_map pointed to by member 76 + * cpu is a NULL pointer. 77 + * Example: On s390 78 + * CPU 0 is on core_id 0 and physical_package_id 6 79 + * CPU 1 is on core_id 1 and physical_package_id 3 80 + * 81 + * Core_id and physical_package_id are platform and architecture 82 + * dependend and might have higher numbers than the CPU id. 83 + * This actually depends on the configuration. 84 + * 85 + * In this case process_cpu_topology() prints error message: 86 + * "socket_id number is too big. You may need to upgrade the 87 + * perf tool." 88 + * 89 + * This is the reason why this test might be skipped. 90 + */ 91 + if (!session->header.env.cpu) 92 + return TEST_SKIP; 93 + 73 94 for (i = 0; i < session->header.env.nr_cpus_avail; i++) { 74 95 if (!cpu_map__has(map, i)) 75 96 continue; ··· 116 95 { 117 96 char path[PATH_MAX]; 118 97 struct cpu_map *map; 119 - int ret = -1; 98 + int ret = TEST_FAIL; 120 99 121 100 TEST_ASSERT_VAL("can't get templ file", !get_temp(path)); 122 101 ··· 131 110 goto free_path; 132 111 } 133 112 134 - if (check_cpu_topology(path, map)) 135 - goto free_map; 136 - ret = 0; 137 - 138 - free_map: 113 + ret = check_cpu_topology(path, map); 139 114 cpu_map__put(map); 115 + 140 116 free_path: 141 117 unlink(path); 142 118 return ret;
+3 -3
tools/perf/util/bpf-loader.c
··· 66 66 } 67 67 68 68 obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, name); 69 - if (IS_ERR(obj)) { 69 + if (IS_ERR_OR_NULL(obj)) { 70 70 pr_debug("bpf: failed to load buffer\n"); 71 71 return ERR_PTR(-EINVAL); 72 72 } ··· 102 102 pr_debug("bpf: successfull builtin compilation\n"); 103 103 obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, filename); 104 104 105 - if (!IS_ERR(obj) && llvm_param.dump_obj) 105 + if (!IS_ERR_OR_NULL(obj) && llvm_param.dump_obj) 106 106 llvm__dump_obj(filename, obj_buf, obj_buf_sz); 107 107 108 108 free(obj_buf); 109 109 } else 110 110 obj = bpf_object__open(filename); 111 111 112 - if (IS_ERR(obj)) { 112 + if (IS_ERR_OR_NULL(obj)) { 113 113 pr_debug("bpf: failed to load %s\n", filename); 114 114 return obj; 115 115 }
+10 -2
tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
··· 96 96 /* Nothing to do, might as well just return */ 97 97 if (decoder->packet_count == 0) 98 98 return 0; 99 + /* 100 + * The queueing process in function cs_etm_decoder__buffer_packet() 101 + * increments the tail *before* using it. This is somewhat counter 102 + * intuitive but it has the advantage of centralizing tail management 103 + * at a single location. Because of that we need to follow the same 104 + * heuristic with the head, i.e we increment it before using its 105 + * value. Otherwise the first element of the packet queue is not 106 + * used. 107 + */ 108 + decoder->head = (decoder->head + 1) & (MAX_BUFFER - 1); 99 109 100 110 *packet = decoder->packet_buffer[decoder->head]; 101 - 102 - decoder->head = (decoder->head + 1) & (MAX_BUFFER - 1); 103 111 104 112 decoder->packet_count--; 105 113
+1
tools/perf/util/evsel.h
··· 127 127 bool precise_max; 128 128 bool ignore_missing_thread; 129 129 bool forced_leader; 130 + bool use_uncore_alias; 130 131 /* parse modifier helper */ 131 132 int exclude_GH; 132 133 int nr_members;
+127 -3
tools/perf/util/parse-events.c
··· 1219 1219 1220 1220 int parse_events_add_pmu(struct parse_events_state *parse_state, 1221 1221 struct list_head *list, char *name, 1222 - struct list_head *head_config, bool auto_merge_stats) 1222 + struct list_head *head_config, 1223 + bool auto_merge_stats, 1224 + bool use_alias) 1223 1225 { 1224 1226 struct perf_event_attr attr; 1225 1227 struct perf_pmu_info info; 1226 1228 struct perf_pmu *pmu; 1227 1229 struct perf_evsel *evsel; 1228 1230 struct parse_events_error *err = parse_state->error; 1231 + bool use_uncore_alias; 1229 1232 LIST_HEAD(config_terms); 1230 1233 1231 1234 pmu = perf_pmu__find(name); ··· 1247 1244 memset(&attr, 0, sizeof(attr)); 1248 1245 } 1249 1246 1247 + use_uncore_alias = (pmu->is_uncore && use_alias); 1248 + 1250 1249 if (!head_config) { 1251 1250 attr.type = pmu->type; 1252 1251 evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu, NULL, auto_merge_stats); 1253 1252 if (evsel) { 1254 1253 evsel->pmu_name = name; 1254 + evsel->use_uncore_alias = use_uncore_alias; 1255 1255 return 0; 1256 1256 } else { 1257 1257 return -ENOMEM; ··· 1288 1282 evsel->metric_expr = info.metric_expr; 1289 1283 evsel->metric_name = info.metric_name; 1290 1284 evsel->pmu_name = name; 1285 + evsel->use_uncore_alias = use_uncore_alias; 1291 1286 } 1292 1287 1293 1288 return evsel ? 0 : -ENOMEM; ··· 1324 1317 list_add_tail(&term->list, head); 1325 1318 1326 1319 if (!parse_events_add_pmu(parse_state, list, 1327 - pmu->name, head, true)) { 1320 + pmu->name, head, 1321 + true, true)) { 1328 1322 pr_debug("%s -> %s/%s/\n", str, 1329 1323 pmu->name, alias->str); 1330 1324 ok++; ··· 1347 1339 return parse_events__modifier_event(list, event_mod, true); 1348 1340 } 1349 1341 1350 - void parse_events__set_leader(char *name, struct list_head *list) 1342 + /* 1343 + * Check if the two uncore PMUs are from the same uncore block 1344 + * The format of the uncore PMU name is uncore_#blockname_#pmuidx 1345 + */ 1346 + static bool is_same_uncore_block(const char *pmu_name_a, const char *pmu_name_b) 1347 + { 1348 + char *end_a, *end_b; 1349 + 1350 + end_a = strrchr(pmu_name_a, '_'); 1351 + end_b = strrchr(pmu_name_b, '_'); 1352 + 1353 + if (!end_a || !end_b) 1354 + return false; 1355 + 1356 + if ((end_a - pmu_name_a) != (end_b - pmu_name_b)) 1357 + return false; 1358 + 1359 + return (strncmp(pmu_name_a, pmu_name_b, end_a - pmu_name_a) == 0); 1360 + } 1361 + 1362 + static int 1363 + parse_events__set_leader_for_uncore_aliase(char *name, struct list_head *list, 1364 + struct parse_events_state *parse_state) 1365 + { 1366 + struct perf_evsel *evsel, *leader; 1367 + uintptr_t *leaders; 1368 + bool is_leader = true; 1369 + int i, nr_pmu = 0, total_members, ret = 0; 1370 + 1371 + leader = list_first_entry(list, struct perf_evsel, node); 1372 + evsel = list_last_entry(list, struct perf_evsel, node); 1373 + total_members = evsel->idx - leader->idx + 1; 1374 + 1375 + leaders = calloc(total_members, sizeof(uintptr_t)); 1376 + if (WARN_ON(!leaders)) 1377 + return 0; 1378 + 1379 + /* 1380 + * Going through the whole group and doing sanity check. 1381 + * All members must use alias, and be from the same uncore block. 1382 + * Also, storing the leader events in an array. 1383 + */ 1384 + __evlist__for_each_entry(list, evsel) { 1385 + 1386 + /* Only split the uncore group which members use alias */ 1387 + if (!evsel->use_uncore_alias) 1388 + goto out; 1389 + 1390 + /* The events must be from the same uncore block */ 1391 + if (!is_same_uncore_block(leader->pmu_name, evsel->pmu_name)) 1392 + goto out; 1393 + 1394 + if (!is_leader) 1395 + continue; 1396 + /* 1397 + * If the event's PMU name starts to repeat, it must be a new 1398 + * event. That can be used to distinguish the leader from 1399 + * other members, even they have the same event name. 1400 + */ 1401 + if ((leader != evsel) && (leader->pmu_name == evsel->pmu_name)) { 1402 + is_leader = false; 1403 + continue; 1404 + } 1405 + /* The name is always alias name */ 1406 + WARN_ON(strcmp(leader->name, evsel->name)); 1407 + 1408 + /* Store the leader event for each PMU */ 1409 + leaders[nr_pmu++] = (uintptr_t) evsel; 1410 + } 1411 + 1412 + /* only one event alias */ 1413 + if (nr_pmu == total_members) { 1414 + parse_state->nr_groups--; 1415 + goto handled; 1416 + } 1417 + 1418 + /* 1419 + * An uncore event alias is a joint name which means the same event 1420 + * runs on all PMUs of a block. 1421 + * Perf doesn't support mixed events from different PMUs in the same 1422 + * group. The big group has to be split into multiple small groups 1423 + * which only include the events from the same PMU. 1424 + * 1425 + * Here the uncore event aliases must be from the same uncore block. 1426 + * The number of PMUs must be same for each alias. The number of new 1427 + * small groups equals to the number of PMUs. 1428 + * Setting the leader event for corresponding members in each group. 1429 + */ 1430 + i = 0; 1431 + __evlist__for_each_entry(list, evsel) { 1432 + if (i >= nr_pmu) 1433 + i = 0; 1434 + evsel->leader = (struct perf_evsel *) leaders[i++]; 1435 + } 1436 + 1437 + /* The number of members and group name are same for each group */ 1438 + for (i = 0; i < nr_pmu; i++) { 1439 + evsel = (struct perf_evsel *) leaders[i]; 1440 + evsel->nr_members = total_members / nr_pmu; 1441 + evsel->group_name = name ? strdup(name) : NULL; 1442 + } 1443 + 1444 + /* Take the new small groups into account */ 1445 + parse_state->nr_groups += nr_pmu - 1; 1446 + 1447 + handled: 1448 + ret = 1; 1449 + out: 1450 + free(leaders); 1451 + return ret; 1452 + } 1453 + 1454 + void parse_events__set_leader(char *name, struct list_head *list, 1455 + struct parse_events_state *parse_state) 1351 1456 { 1352 1457 struct perf_evsel *leader; 1353 1458 ··· 1468 1347 WARN_ONCE(true, "WARNING: failed to set leader: empty list"); 1469 1348 return; 1470 1349 } 1350 + 1351 + if (parse_events__set_leader_for_uncore_aliase(name, list, parse_state)) 1352 + return; 1471 1353 1472 1354 __perf_evlist__set_leader(list); 1473 1355 leader = list_entry(list->next, struct perf_evsel, node);
+5 -2
tools/perf/util/parse-events.h
··· 167 167 void *ptr, char *type, u64 len); 168 168 int parse_events_add_pmu(struct parse_events_state *parse_state, 169 169 struct list_head *list, char *name, 170 - struct list_head *head_config, bool auto_merge_stats); 170 + struct list_head *head_config, 171 + bool auto_merge_stats, 172 + bool use_alias); 171 173 172 174 int parse_events_multi_pmu_add(struct parse_events_state *parse_state, 173 175 char *str, ··· 180 178 181 179 enum perf_pmu_event_symbol_type 182 180 perf_pmu__parse_check(const char *name); 183 - void parse_events__set_leader(char *name, struct list_head *list); 181 + void parse_events__set_leader(char *name, struct list_head *list, 182 + struct parse_events_state *parse_state); 184 183 void parse_events_update_lists(struct list_head *list_event, 185 184 struct list_head *list_all); 186 185 void parse_events_evlist_error(struct parse_events_state *parse_state,
+4 -4
tools/perf/util/parse-events.y
··· 161 161 struct list_head *list = $3; 162 162 163 163 inc_group_count(list, _parse_state); 164 - parse_events__set_leader($1, list); 164 + parse_events__set_leader($1, list, _parse_state); 165 165 $$ = list; 166 166 } 167 167 | ··· 170 170 struct list_head *list = $2; 171 171 172 172 inc_group_count(list, _parse_state); 173 - parse_events__set_leader(NULL, list); 173 + parse_events__set_leader(NULL, list, _parse_state); 174 174 $$ = list; 175 175 } 176 176 ··· 232 232 YYABORT; 233 233 234 234 ALLOC_LIST(list); 235 - if (parse_events_add_pmu(_parse_state, list, $1, $2, false)) { 235 + if (parse_events_add_pmu(_parse_state, list, $1, $2, false, false)) { 236 236 struct perf_pmu *pmu = NULL; 237 237 int ok = 0; 238 238 char *pattern; ··· 251 251 free(pattern); 252 252 YYABORT; 253 253 } 254 - if (!parse_events_add_pmu(_parse_state, list, pmu->name, terms, true)) 254 + if (!parse_events_add_pmu(_parse_state, list, pmu->name, terms, true, false)) 255 255 ok++; 256 256 parse_events_terms__delete(terms); 257 257 }
+2
tools/perf/util/scripting-engines/trace-event-python.c
··· 531 531 PyLong_FromUnsignedLongLong(sample->period)); 532 532 pydict_set_item_string_decref(dict_sample, "phys_addr", 533 533 PyLong_FromUnsignedLongLong(sample->phys_addr)); 534 + pydict_set_item_string_decref(dict_sample, "addr", 535 + PyLong_FromUnsignedLongLong(sample->addr)); 534 536 set_sample_read_in_dict(dict_sample, sample, evsel); 535 537 pydict_set_item_string_decref(dict, "sample", dict_sample); 536 538