Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf mem: Add 'dtlb' output field

This is a breakdown of perf_mem_data_src.mem_dtlb values. It assumes
PMU drivers would set PERF_MEM_TLB_HIT bit with an appropriate level.

And having PERF_MEM_TLB_MISS means that it failed to find one in any
levels of TLB. For now, it doesn't use PERF_MEM_TLB_{WK,OS} bits.

Also it seems Intel machines don't distinguish L1 or L2 precisely. So I
added ANY_HIT (printed as "L?-Hit") to handle the case.

$ perf mem report -F overhead,dtlb,dso --stdio
...
# --- D-TLB ----
# Overhead L?-Hit Miss Shared Object
# ........ .............. .................
#
67.03% 99.5% 0.5% [unknown]
31.23% 99.2% 0.8% [kernel.kallsyms]
1.08% 97.8% 2.2% [i915]
0.36% 100.0% 0.0% [JIT] tid 6853
0.12% 100.0% 0.0% [drm]
0.05% 100.0% 0.0% [drm_kms_helper]
0.05% 100.0% 0.0% [ext4]
0.02% 100.0% 0.0% [aesni_intel]
0.02% 100.0% 0.0% [crc32c_intel]
0.02% 100.0% 0.0% [dm_crypt]
...

Committer testing:

# perf report --header | grep cpudesc
# cpudesc : AMD Ryzen 9 9950X3D 16-Core Processor
# perf mem report -F overhead,dtlb,dso --stdio | head -20
# To display the perf.data header info, please use --header/--header-only options.
#
#
# Total Lost Samples: 0
#
# Samples: 2K of event 'cycles:P'
# Total weight : 2637
# Sort order : local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,local_p_stage_cyc
#
# ---------- D-TLB -----------
# Overhead L1-Hit L2-Hit Miss Other Shared Object
# ........ ............................ .................................
#
77.47% 18.4% 0.1% 0.6% 80.9% [kernel.kallsyms]
5.61% 36.5% 0.7% 1.4% 61.5% libxul.so
2.77% 39.7% 0.0% 12.3% 47.9% libc.so.6
2.01% 34.0% 1.9% 1.9% 62.3% libglib-2.0.so.0.8400.1
1.93% 31.4% 2.0% 2.0% 64.7% [amdgpu]
1.63% 48.8% 0.0% 0.0% 51.2% [JIT] tid 60168
1.14% 3.3% 0.0% 0.0% 96.7% [vdso]
#

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Leo Yan <leo.yan@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Link: https://lore.kernel.org/r/20250430205548.789750-12-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

Namhyung Kim and committed by
Arnaldo Carvalho de Melo
f7458176 5e424a01

+46
+3
tools/perf/ui/browsers/hists.c
··· 1288 1288 __HPP_COLOR_MEM_STAT_FN(cache, CACHE) 1289 1289 __HPP_COLOR_MEM_STAT_FN(memory, MEMORY) 1290 1290 __HPP_COLOR_MEM_STAT_FN(snoop, SNOOP) 1291 + __HPP_COLOR_MEM_STAT_FN(dtlb, DTLB) 1291 1292 1292 1293 #undef __HPP_COLOR_PERCENT_FN 1293 1294 #undef __HPP_COLOR_ACC_PERCENT_FN ··· 1320 1319 hist_browser__hpp_color_mem_stat_memory; 1321 1320 perf_hpp__format[PERF_HPP__MEM_STAT_SNOOP].color = 1322 1321 hist_browser__hpp_color_mem_stat_snoop; 1322 + perf_hpp__format[PERF_HPP__MEM_STAT_DTLB].color = 1323 + hist_browser__hpp_color_mem_stat_dtlb; 1323 1324 1324 1325 res_sample_init(); 1325 1326 }
+5
tools/perf/ui/hist.c
··· 354 354 return PERF_MEM_STAT_MEMORY; 355 355 case PERF_HPP__MEM_STAT_SNOOP: 356 356 return PERF_MEM_STAT_SNOOP; 357 + case PERF_HPP__MEM_STAT_DTLB: 358 + return PERF_MEM_STAT_DTLB; 357 359 default: 358 360 break; 359 361 } ··· 655 653 HPP_MEM_STAT_FNS(cache, CACHE) 656 654 HPP_MEM_STAT_FNS(memory, MEMORY) 657 655 HPP_MEM_STAT_FNS(snoop, SNOOP) 656 + HPP_MEM_STAT_FNS(dtlb, DTLB) 658 657 659 658 static int64_t hpp__nop_cmp(struct perf_hpp_fmt *fmt __maybe_unused, 660 659 struct hist_entry *a __maybe_unused, ··· 763 760 HPP__MEM_STAT_PRINT_FNS("Cache", cache, CACHE), 764 761 HPP__MEM_STAT_PRINT_FNS("Memory", memory, MEMORY), 765 762 HPP__MEM_STAT_PRINT_FNS("Snoop", snoop, SNOOP), 763 + HPP__MEM_STAT_PRINT_FNS("D-TLB", dtlb, DTLB), 766 764 }; 767 765 768 766 struct perf_hpp_list perf_hpp_list = { ··· 1122 1118 case PERF_HPP__MEM_STAT_CACHE: 1123 1119 case PERF_HPP__MEM_STAT_MEMORY: 1124 1120 case PERF_HPP__MEM_STAT_SNOOP: 1121 + case PERF_HPP__MEM_STAT_DTLB: 1125 1122 fmt->len = MEM_STAT_LEN * MEM_STAT_PRINT_LEN; 1126 1123 break; 1127 1124
+1
tools/perf/util/hist.h
··· 592 592 PERF_HPP__MEM_STAT_CACHE, 593 593 PERF_HPP__MEM_STAT_MEMORY, 594 594 PERF_HPP__MEM_STAT_SNOOP, 595 + PERF_HPP__MEM_STAT_DTLB, 595 596 596 597 PERF_HPP__MAX_INDEX 597 598 };
+27
tools/perf/util/mem-events.c
··· 868 868 default: 869 869 return MEM_STAT_SNOOP_OTHER; 870 870 } 871 + case PERF_MEM_STAT_DTLB: 872 + switch (src.mem_dtlb) { 873 + case PERF_MEM_TLB_L1 | PERF_MEM_TLB_HIT: 874 + return MEM_STAT_DTLB_L1_HIT; 875 + case PERF_MEM_TLB_L2 | PERF_MEM_TLB_HIT: 876 + return MEM_STAT_DTLB_L2_HIT; 877 + case PERF_MEM_TLB_L1 | PERF_MEM_TLB_L2 | PERF_MEM_TLB_HIT: 878 + return MEM_STAT_DTLB_ANY_HIT; 879 + default: 880 + if (src.mem_dtlb & PERF_MEM_TLB_MISS) 881 + return MEM_STAT_DTLB_MISS; 882 + return MEM_STAT_DTLB_OTHER; 883 + } 871 884 default: 872 885 break; 873 886 } ··· 952 939 case MEM_STAT_SNOOP_MISS: 953 940 return "Miss"; 954 941 case MEM_STAT_SNOOP_OTHER: 942 + default: 943 + return "Other"; 944 + } 945 + case PERF_MEM_STAT_DTLB: 946 + switch (idx) { 947 + case MEM_STAT_DTLB_L1_HIT: 948 + return "L1-Hit"; 949 + case MEM_STAT_DTLB_L2_HIT: 950 + return "L2-Hit"; 951 + case MEM_STAT_DTLB_ANY_HIT: 952 + return "L?-Hit"; 953 + case MEM_STAT_DTLB_MISS: 954 + return "Miss"; 955 + case MEM_STAT_DTLB_OTHER: 955 956 default: 956 957 return "Other"; 957 958 }
+9
tools/perf/util/mem-events.h
··· 94 94 PERF_MEM_STAT_CACHE, 95 95 PERF_MEM_STAT_MEMORY, 96 96 PERF_MEM_STAT_SNOOP, 97 + PERF_MEM_STAT_DTLB, 97 98 }; 98 99 99 100 #define MEM_STAT_PRINT_LEN 7 /* 1 space + 5 digits + 1 percent sign */ ··· 133 132 MEM_STAT_SNOOP_HITM, 134 133 MEM_STAT_SNOOP_MISS, 135 134 MEM_STAT_SNOOP_OTHER, 135 + }; 136 + 137 + enum mem_stat_dtlb { 138 + MEM_STAT_DTLB_L1_HIT, 139 + MEM_STAT_DTLB_L2_HIT, 140 + MEM_STAT_DTLB_ANY_HIT, 141 + MEM_STAT_DTLB_MISS, 142 + MEM_STAT_DTLB_OTHER, 136 143 }; 137 144 138 145 int mem_stat_index(const enum mem_stat_type mst, const u64 data_src);
+1
tools/perf/util/sort.c
··· 2627 2627 DIM_MEM(PERF_HPP__MEM_STAT_CACHE, "cache"), 2628 2628 DIM_MEM(PERF_HPP__MEM_STAT_MEMORY, "memory"), 2629 2629 DIM_MEM(PERF_HPP__MEM_STAT_SNOOP, "snoop"), 2630 + DIM_MEM(PERF_HPP__MEM_STAT_DTLB, "dtlb"), 2630 2631 }; 2631 2632 2632 2633 #undef DIM_MEM