Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf report: Add parallelism filter

Add parallelism filter that can be used to look at specific parallelism
levels only. The format is the same as cpu lists. For example:

Only single-threaded samples: --parallelism=1
Low parallelism only: --parallelism=1-4
High parallelism only: --parallelism=64-128

Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Link: https://lore.kernel.org/r/e61348985ff0a6a14b07c39e880edbd60a8f8635.1739437531.git.dvyukov@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>

authored by

Dmitry Vyukov and committed by
Namhyung Kim
61b6b31c 216f8a97

+91 -2
+4 -1
tools/perf/builtin-report.c
··· 1390 1390 symbol__config_symfs), 1391 1391 OPT_STRING('C', "cpu", &report.cpu_list, "cpu", 1392 1392 "list of cpus to profile"), 1393 + OPT_STRING(0, "parallelism", &symbol_conf.parallelism_list_str, "parallelism", 1394 + "only consider these parallelism levels (cpu set format)"), 1393 1395 OPT_BOOLEAN('I', "show-info", &report.show_full_info, 1394 1396 "Display extended information about perf.data file"), 1395 1397 OPT_BOOLEAN(0, "source", &annotate_opts.annotate_src, ··· 1723 1721 } 1724 1722 1725 1723 if (report.disable_order || !perf_session__has_switch_events(session)) { 1726 - if ((sort_order && strstr(sort_order, "parallelism")) || 1724 + if (symbol_conf.parallelism_list_str || 1725 + (sort_order && strstr(sort_order, "parallelism")) || 1727 1726 (field_order && strstr(field_order, "parallelism"))) { 1728 1727 if (report.disable_order) 1729 1728 ui__error("Use of parallelism is incompatible with --disable-order.\n");
+2
tools/perf/util/event.c
··· 769 769 770 770 /* Account for possible out-of-order switch events. */ 771 771 al->parallelism = max(1, min(machine->parallelism, machine__nr_cpus_avail(machine))); 772 + if (test_bit(al->parallelism, symbol_conf.parallelism_filter)) 773 + al->filtered |= (1 << HIST_FILTER__PARALLELISM); 772 774 773 775 if (al->map) { 774 776 if (symbol_conf.dso_list &&
+31
tools/perf/util/hist.c
··· 43 43 struct hist_entry *he); 44 44 static bool hists__filter_entry_by_socket(struct hists *hists, 45 45 struct hist_entry *he); 46 + static bool hists__filter_entry_by_parallelism(struct hists *hists, 47 + struct hist_entry *he); 46 48 47 49 u16 hists__col_len(struct hists *hists, enum hist_column col) 48 50 { ··· 1459 1457 if (symbol_conf.sym_list == NULL) 1460 1458 return; 1461 1459 break; 1460 + case HIST_FILTER__PARALLELISM: 1461 + if (__bitmap_weight(symbol_conf.parallelism_filter, MAX_NR_CPUS + 1) == 0) 1462 + return; 1463 + break; 1462 1464 case HIST_FILTER__PARENT: 1463 1465 case HIST_FILTER__GUEST: 1464 1466 case HIST_FILTER__HOST: ··· 1520 1514 1521 1515 hist_entry__check_and_remove_filter(he, HIST_FILTER__SYMBOL, 1522 1516 perf_hpp__is_sym_entry); 1517 + 1518 + hist_entry__check_and_remove_filter(he, HIST_FILTER__PARALLELISM, 1519 + perf_hpp__is_parallelism_entry); 1523 1520 1524 1521 hists__apply_filters(he->hists, he); 1525 1522 } ··· 1720 1711 hists__filter_entry_by_thread(hists, he); 1721 1712 hists__filter_entry_by_symbol(hists, he); 1722 1713 hists__filter_entry_by_socket(hists, he); 1714 + hists__filter_entry_by_parallelism(hists, he); 1723 1715 } 1724 1716 1725 1717 int hists__collapse_resort(struct hists *hists, struct ui_progress *prog) ··· 2207 2197 return false; 2208 2198 } 2209 2199 2200 + static bool hists__filter_entry_by_parallelism(struct hists *hists, 2201 + struct hist_entry *he) 2202 + { 2203 + if (test_bit(he->parallelism, hists->parallelism_filter)) { 2204 + he->filtered |= (1 << HIST_FILTER__PARALLELISM); 2205 + return true; 2206 + } 2207 + return false; 2208 + } 2209 + 2210 2210 typedef bool (*filter_fn_t)(struct hists *hists, struct hist_entry *he); 2211 2211 2212 2212 static void hists__filter_by_type(struct hists *hists, int type, filter_fn_t filter) ··· 2384 2364 else 2385 2365 hists__filter_by_type(hists, HIST_FILTER__SOCKET, 2386 2366 hists__filter_entry_by_socket); 2367 + } 2368 + 2369 + void hists__filter_by_parallelism(struct hists *hists) 2370 + { 2371 + if (symbol_conf.report_hierarchy) 2372 + hists__filter_hierarchy(hists, HIST_FILTER__PARALLELISM, 2373 + hists->parallelism_filter); 2374 + else 2375 + hists__filter_by_type(hists, HIST_FILTER__PARALLELISM, 2376 + hists__filter_entry_by_parallelism); 2387 2377 } 2388 2378 2389 2379 void events_stats__inc(struct events_stats *stats, u32 type) ··· 2902 2872 hists->entries = RB_ROOT_CACHED; 2903 2873 mutex_init(&hists->lock); 2904 2874 hists->socket_filter = -1; 2875 + hists->parallelism_filter = symbol_conf.parallelism_filter; 2905 2876 hists->hpp_list = hpp_list; 2906 2877 INIT_LIST_HEAD(&hists->hpp_formats); 2907 2878 return 0;
+5 -1
tools/perf/util/hist.h
··· 31 31 HIST_FILTER__HOST, 32 32 HIST_FILTER__SOCKET, 33 33 HIST_FILTER__C2C, 34 + HIST_FILTER__PARALLELISM, 34 35 }; 35 36 36 37 typedef u16 filter_mask_t; ··· 113 112 const struct dso *dso_filter; 114 113 const char *uid_filter_str; 115 114 const char *symbol_filter_str; 115 + unsigned long *parallelism_filter; 116 116 struct mutex lock; 117 117 struct hists_stats stats; 118 118 u64 event_stream; ··· 390 388 void hists__filter_by_thread(struct hists *hists); 391 389 void hists__filter_by_symbol(struct hists *hists); 392 390 void hists__filter_by_socket(struct hists *hists); 391 + void hists__filter_by_parallelism(struct hists *hists); 393 392 394 393 static inline bool hists__has_filter(struct hists *hists) 395 394 { 396 395 return hists->thread_filter || hists->dso_filter || 397 - hists->symbol_filter_str || (hists->socket_filter > -1); 396 + hists->symbol_filter_str || (hists->socket_filter > -1) || 397 + hists->parallelism_filter; 398 398 } 399 399 400 400 u16 hists__col_len(struct hists *hists, enum hist_column col);
+11
tools/perf/util/sort.c
··· 900 900 return right->parallelism - left->parallelism; 901 901 } 902 902 903 + static int hist_entry__parallelism_filter(struct hist_entry *he, int type, const void *arg) 904 + { 905 + const unsigned long *parallelism_filter = arg; 906 + 907 + if (type != HIST_FILTER__PARALLELISM) 908 + return -1; 909 + 910 + return test_bit(he->parallelism, parallelism_filter); 911 + } 912 + 903 913 static int hist_entry__parallelism_snprintf(struct hist_entry *he, char *bf, 904 914 size_t size, unsigned int width) 905 915 { ··· 919 909 struct sort_entry sort_parallelism = { 920 910 .se_header = "Parallelism", 921 911 .se_cmp = sort__parallelism_cmp, 912 + .se_filter = hist_entry__parallelism_filter, 922 913 .se_snprintf = hist_entry__parallelism_snprintf, 923 914 .se_width_idx = HISTC_PARALLELISM, 924 915 };
+34
tools/perf/util/symbol.c
··· 18 18 #include "annotate.h" 19 19 #include "build-id.h" 20 20 #include "cap.h" 21 + #include "cpumap.h" 21 22 #include "dso.h" 22 23 #include "util.h" // lsdir() 23 24 #include "debug.h" ··· 2472 2471 return 0; 2473 2472 } 2474 2473 2474 + static int setup_parallelism_bitmap(void) 2475 + { 2476 + struct perf_cpu_map *map; 2477 + struct perf_cpu cpu; 2478 + int i, err = -1; 2479 + 2480 + if (symbol_conf.parallelism_list_str == NULL) 2481 + return 0; 2482 + 2483 + map = perf_cpu_map__new(symbol_conf.parallelism_list_str); 2484 + if (map == NULL) { 2485 + pr_err("failed to parse parallelism filter list\n"); 2486 + return -1; 2487 + } 2488 + 2489 + bitmap_fill(symbol_conf.parallelism_filter, MAX_NR_CPUS + 1); 2490 + perf_cpu_map__for_each_cpu(cpu, i, map) { 2491 + if (cpu.cpu <= 0 || cpu.cpu > MAX_NR_CPUS) { 2492 + pr_err("Requested parallelism level %d is invalid.\n", cpu.cpu); 2493 + goto out_delete_map; 2494 + } 2495 + __clear_bit(cpu.cpu, symbol_conf.parallelism_filter); 2496 + } 2497 + 2498 + err = 0; 2499 + out_delete_map: 2500 + perf_cpu_map__put(map); 2501 + return err; 2502 + } 2503 + 2475 2504 int symbol__init(struct perf_env *env) 2476 2505 { 2477 2506 const char *symfs; ··· 2520 2489 pr_err("'.' is the only non valid --field-separator argument\n"); 2521 2490 return -1; 2522 2491 } 2492 + 2493 + if (setup_parallelism_bitmap()) 2494 + return -1; 2523 2495 2524 2496 if (setup_list(&symbol_conf.dso_list, 2525 2497 symbol_conf.dso_list_str, "dso") < 0)
+4
tools/perf/util/symbol_conf.h
··· 3 3 #define __PERF_SYMBOL_CONF 1 4 4 5 5 #include <stdbool.h> 6 + #include <linux/bitmap.h> 7 + #include "perf.h" 6 8 7 9 struct strlist; 8 10 struct intlist; ··· 64 62 *pid_list_str, 65 63 *tid_list_str, 66 64 *sym_list_str, 65 + *parallelism_list_str, 67 66 *col_width_list_str, 68 67 *bt_stop_list_str; 69 68 const char *addr2line_path; ··· 85 82 int pad_output_len_dso; 86 83 int group_sort_idx; 87 84 int addr_range; 85 + DECLARE_BITMAP(parallelism_filter, MAX_NR_CPUS + 1); 88 86 }; 89 87 90 88 extern struct symbol_conf symbol_conf;