Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf tools: Support data block and addr block

Two new data source fields, to indicate the block reasons of a load
instruction, are introduced on the Intel Sapphire Rapids server. The
fields can be used by the memory profiling.

Add a new sort function, SORT_MEM_BLOCKED, for the two fields.

For the previous platforms or the block reason is unknown, print "N/A"
for the block reason.

Add blocked as a default mem sort key for perf report and perf mem
report.

Committer testing:

So in machines without this capability we get a "N/A" filling the new "Blocked"
column:

$ perf mem record ls
arch certs CREDITS Documentation include ipc Kconfig lib MAINTAINERS mm samples security usr block
COPYING crypto drivers fs init Kbuild kernel LICENSES Makefile net README scripts sound tools
virt
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.008 MB perf.data (17 samples) ]
$
$ perf mem report --stdio
# To display the perf.data header info, please use --header/--header-only options.
#
# Total Lost Samples: 0
#
# Samples: 6 of event 'cpu/mem-loads,ldlat=30/Pu'
# Total weight : 1381
# Sort order : local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked
#
# Overhead Samples Local Weight Memory access Symbol Shared Object Data Symbol Data Object Snoop TLB access Locked Blocked
# ........ ....... ............ .................... ....................... ............. ...................... ............ ..... ............ ...... .......
#
32.87% 1 454 Local RAM or RAM hit [.] _dl_relocate_object ld-2.31.so [.] 0x00007fe91cef3078 libc-2.31.so Hit L1 or L2 hit No N/A
25.56% 1 353 LFB or LFB hit [.] strcmp ld-2.31.so [.] 0x00005586973855ca ls None L1 or L2 hit No N/A
22.59% 1 312 LFB or LFB hit [.] _dl_cache_libcmp ld-2.31.so [.] 0x00007fe91d0e3b18 ld.so.cache None L1 or L2 hit No N/A
8.47% 1 117 LFB or LFB hit [.] _dl_relocate_object ld-2.31.so [.] 0x00007fe91ceee570 libc-2.31.so None L1 or L2 hit No N/A
6.88% 1 95 LFB or LFB hit [.] _dl_relocate_object ld-2.31.so [.] 0x00007fe91ceed490 libc-2.31.so None L1 or L2 hit No N/A
3.62% 1 50 LFB or LFB hit [.] _dl_cache_libcmp ld-2.31.so [.] 0x00007fe91d0ebe60 ld.so.cache None L1 or L2 hit No N/A

# Samples: 11 of event 'cpu/mem-stores/Pu'
# Total weight : 11
# Sort order : local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked
#
# Overhead Samples Local Weight Memory access Symbol Shared Object Data Symbol Data Object Snoop TLB access Locked Blocked
# ........ ....... ............ ............. ....................... ............. ...................... ........... ..... .......... ...... .......
#
9.09% 1 0 L1 hit [.] __strcoll_l libc-2.31.so [.] 0x00007fffe5648fc8 [stack] N/A N/A N/A N/A
9.09% 1 0 L1 hit [.] _dl_lookup_symbol_x ld-2.31.so [.] 0x00007fffe56490b8 [stack] N/A N/A N/A N/A
9.09% 1 0 L1 hit [.] _dl_name_match_p ld-2.31.so [.] 0x00007fffe56487d8 [stack] N/A N/A N/A N/A
9.09% 1 0 L1 hit [.] _dl_start ld-2.31.so [.] start_time+0x0 ld-2.31.so N/A N/A N/A N/A
9.09% 1 0 L1 hit [.] _dl_sysdep_start ld-2.31.so [.] 0x00007fffe56494b8 [stack] N/A N/A N/A N/A
9.09% 1 0 L1 hit [.] do_lookup_x ld-2.31.so [.] 0x00007fffe5648ff8 [stack] N/A N/A N/A N/A
9.09% 1 0 L1 hit [.] do_lookup_x ld-2.31.so [.] 0x00007fffe5649064 [stack] N/A N/A N/A N/A
9.09% 1 0 L1 hit [.] do_lookup_x ld-2.31.so [.] 0x00007fffe5649130 [stack] N/A N/A N/A N/A
9.09% 1 0 L1 miss [.] _dl_start ld-2.31.so [.] _rtld_global+0xaf8 ld-2.31.so N/A N/A N/A N/A
9.09% 1 0 L1 miss [.] _dl_start ld-2.31.so [.] _rtld_global+0xc28 ld-2.31.so N/A N/A N/A N/A
9.09% 1 0 L1 miss [.] _dl_start ld-2.31.so [.] 0x00007fffe56495b8 [stack] N/A N/A N/A N/A

# (Tip: Show user configuration overrides: perf config --user --list)
$

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jin Yao <yao.jin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lore.kernel.org/lkml/1612296553-21962-4-git-send-email-kan.liang@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

Kan Liang and committed by
Arnaldo Carvalho de Melo
a054c298 2a57d408

+70 -4
+3 -2
tools/perf/Documentation/perf-report.txt
··· 140 140 141 141 If the --mem-mode option is used, the following sort keys are also available 142 142 (incompatible with --branch-stack): 143 - symbol_daddr, dso_daddr, locked, tlb, mem, snoop, dcacheline. 143 + symbol_daddr, dso_daddr, locked, tlb, mem, snoop, dcacheline, blocked. 144 144 145 145 - symbol_daddr: name of data symbol being executed on at the time of sample 146 146 - dso_daddr: name of library or module containing the data being executed ··· 152 152 - dcacheline: the cacheline the data address is on at the time of the sample 153 153 - phys_daddr: physical address of data being executed on at the time of sample 154 154 - data_page_size: the data page size of data being executed on at the time of sample 155 + - blocked: reason of blocked load access for the data at the time of the sample 155 156 156 157 And the default sort keys are changed to local_weight, mem, sym, dso, 157 - symbol_daddr, dso_daddr, snoop, tlb, locked, see '--mem-mode'. 158 + symbol_daddr, dso_daddr, snoop, tlb, locked, blocked, see '--mem-mode'. 158 159 159 160 If the data file has tracepoint event(s), following (dynamic) sort keys 160 161 are also available:
+1 -1
tools/perf/builtin-mem.c
··· 309 309 "dso_daddr,tlb,locked"); 310 310 } else if (has_extra_options) { 311 311 strcpy(sort, "--sort=local_weight,mem,sym,dso,symbol_daddr," 312 - "dso_daddr,snoop,tlb,locked"); 312 + "dso_daddr,snoop,tlb,locked,blocked"); 313 313 } else 314 314 return NULL; 315 315
+1
tools/perf/util/hist.c
··· 208 208 hists__new_col_len(hists, HISTC_MEM_LVL, 21 + 3); 209 209 hists__new_col_len(hists, HISTC_LOCAL_WEIGHT, 12); 210 210 hists__new_col_len(hists, HISTC_GLOBAL_WEIGHT, 12); 211 + hists__new_col_len(hists, HISTC_MEM_BLOCKED, 10); 211 212 if (symbol_conf.nanosecs) 212 213 hists__new_col_len(hists, HISTC_TIME, 16); 213 214 else
+1
tools/perf/util/hist.h
··· 72 72 HISTC_SYM_SIZE, 73 73 HISTC_DSO_SIZE, 74 74 HISTC_SYMBOL_IPC, 75 + HISTC_MEM_BLOCKED, 75 76 HISTC_NR_COLS, /* Last entry */ 76 77 }; 77 78
+25
tools/perf/util/mem-events.c
··· 337 337 return l; 338 338 } 339 339 340 + int perf_mem__blk_scnprintf(char *out, size_t sz, struct mem_info *mem_info) 341 + { 342 + size_t l = 0; 343 + u64 mask = PERF_MEM_BLK_NA; 344 + 345 + sz -= 1; /* -1 for null termination */ 346 + out[0] = '\0'; 347 + 348 + if (mem_info) 349 + mask = mem_info->data_src.mem_blk; 350 + 351 + if (!mask || (mask & PERF_MEM_BLK_NA)) { 352 + l += scnprintf(out + l, sz - l, " N/A"); 353 + return l; 354 + } 355 + if (mask & PERF_MEM_BLK_DATA) 356 + l += scnprintf(out + l, sz - l, " Data"); 357 + if (mask & PERF_MEM_BLK_ADDR) 358 + l += scnprintf(out + l, sz - l, " Addr"); 359 + 360 + return l; 361 + } 362 + 340 363 int perf_script__meminfo_scnprintf(char *out, size_t sz, struct mem_info *mem_info) 341 364 { 342 365 int i = 0; ··· 371 348 i += perf_mem__tlb_scnprintf(out + i, sz - i, mem_info); 372 349 i += scnprintf(out + i, sz - i, "|LCK "); 373 350 i += perf_mem__lck_scnprintf(out + i, sz - i, mem_info); 351 + i += scnprintf(out + i, sz - i, "|BLK "); 352 + i += perf_mem__blk_scnprintf(out + i, sz - i, mem_info); 374 353 375 354 return i; 376 355 }
+1
tools/perf/util/mem-events.h
··· 49 49 int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info); 50 50 int perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info); 51 51 int perf_mem__lck_scnprintf(char *out, size_t sz, struct mem_info *mem_info); 52 + int perf_mem__blk_scnprintf(char *out, size_t sz, struct mem_info *mem_info); 52 53 53 54 int perf_script__meminfo_scnprintf(char *bf, size_t size, struct mem_info *mem_info); 54 55
+37 -1
tools/perf/util/sort.c
··· 36 36 const char *parent_pattern = default_parent_pattern; 37 37 const char *default_sort_order = "comm,dso,symbol"; 38 38 const char default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cycles"; 39 - const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked"; 39 + const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked"; 40 40 const char default_top_sort_order[] = "dso,symbol"; 41 41 const char default_diff_sort_order[] = "dso,symbol"; 42 42 const char default_tracepoint_sort_order[] = "trace"; ··· 1422 1422 }; 1423 1423 1424 1424 static int64_t 1425 + sort__blocked_cmp(struct hist_entry *left, struct hist_entry *right) 1426 + { 1427 + union perf_mem_data_src data_src_l; 1428 + union perf_mem_data_src data_src_r; 1429 + 1430 + if (left->mem_info) 1431 + data_src_l = left->mem_info->data_src; 1432 + else 1433 + data_src_l.mem_blk = PERF_MEM_BLK_NA; 1434 + 1435 + if (right->mem_info) 1436 + data_src_r = right->mem_info->data_src; 1437 + else 1438 + data_src_r.mem_blk = PERF_MEM_BLK_NA; 1439 + 1440 + return (int64_t)(data_src_r.mem_blk - data_src_l.mem_blk); 1441 + } 1442 + 1443 + static int hist_entry__blocked_snprintf(struct hist_entry *he, char *bf, 1444 + size_t size, unsigned int width) 1445 + { 1446 + char out[16]; 1447 + 1448 + perf_mem__blk_scnprintf(out, sizeof(out), he->mem_info); 1449 + return repsep_snprintf(bf, size, "%.*s", width, out); 1450 + } 1451 + 1452 + struct sort_entry sort_mem_blocked = { 1453 + .se_header = "Blocked", 1454 + .se_cmp = sort__blocked_cmp, 1455 + .se_snprintf = hist_entry__blocked_snprintf, 1456 + .se_width_idx = HISTC_MEM_BLOCKED, 1457 + }; 1458 + 1459 + static int64_t 1425 1460 sort__phys_daddr_cmp(struct hist_entry *left, struct hist_entry *right) 1426 1461 { 1427 1462 uint64_t l = 0, r = 0; ··· 1831 1796 DIM(SORT_MEM_DCACHELINE, "dcacheline", sort_mem_dcacheline), 1832 1797 DIM(SORT_MEM_PHYS_DADDR, "phys_daddr", sort_mem_phys_daddr), 1833 1798 DIM(SORT_MEM_DATA_PAGE_SIZE, "data_page_size", sort_mem_data_page_size), 1799 + DIM(SORT_MEM_BLOCKED, "blocked", sort_mem_blocked), 1834 1800 }; 1835 1801 1836 1802 #undef DIM
+1
tools/perf/util/sort.h
··· 258 258 SORT_MEM_IADDR_SYMBOL, 259 259 SORT_MEM_PHYS_DADDR, 260 260 SORT_MEM_DATA_PAGE_SIZE, 261 + SORT_MEM_BLOCKED, 261 262 }; 262 263 263 264 /*