Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf tools: Add MEM_TOPOLOGY feature to perf data file

Adding MEM_TOPOLOGY feature to perf data file,
that will carry physical memory map and its
node assignments.

The format of data in MEM_TOPOLOGY is as follows:

0 - version | for future changes
8 - block_size_bytes | /sys/devices/system/memory/block_size_bytes
16 - count | number of nodes

For each node we store map of physical indexes for
each node:

32 - node id | node index
40 - size | size of bitmap
48 - bitmap | bitmap of memory indexes that belongs to node
| /sys/devices/system/node/node<NODE>/memory<INDEX>

The MEM_TOPOLOGY could be displayed with following
report command:

$ perf report --header-only -I
...
# memory nodes (nr 1, block size 0x8000000):
# 0 [7G]: 0-23,32-69

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20180307155020.32613-8-jolsa@kernel.org
[ Rename 'index' to 'idx', as this breaks the build in rhel5, 6 and other systems where this is used by glibc headers ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

Jiri Olsa and committed by
Arnaldo Carvalho de Melo
e2091ced 5cedb413

+316 -1
+1 -1
tools/include/linux/bitmap.h
··· 98 98 99 99 /** 100 100 * bitmap_alloc - Allocate bitmap 101 - * @nr: Bit to set 101 + * @nbits: Number of bits 102 102 */ 103 103 static inline unsigned long *bitmap_alloc(int nbits) 104 104 {
+9
tools/perf/util/env.h
··· 27 27 struct cpu_map *map; 28 28 }; 29 29 30 + struct memory_node { 31 + u64 node; 32 + u64 size; 33 + unsigned long *set; 34 + }; 35 + 30 36 struct perf_env { 31 37 char *hostname; 32 38 char *os_release; ··· 49 43 int nr_sibling_cores; 50 44 int nr_sibling_threads; 51 45 int nr_numa_nodes; 46 + int nr_memory_nodes; 52 47 int nr_pmu_mappings; 53 48 int nr_groups; 54 49 char *cmdline; ··· 61 54 struct cpu_cache_level *caches; 62 55 int caches_cnt; 63 56 struct numa_node *numa_nodes; 57 + struct memory_node *memory_nodes; 58 + unsigned long long memory_bsize; 64 59 }; 65 60 66 61 extern struct perf_env perf_env;
+305
tools/perf/util/header.c
··· 17 17 #include <sys/stat.h> 18 18 #include <sys/utsname.h> 19 19 #include <linux/time64.h> 20 + #include <dirent.h> 20 21 21 22 #include "evlist.h" 22 23 #include "evsel.h" ··· 38 37 #include "asm/bug.h" 39 38 #include "tool.h" 40 39 #include "time-utils.h" 40 + #include "units.h" 41 41 42 42 #include "sane_ctype.h" 43 43 ··· 131 129 if (!ff->buf) 132 130 return __do_write_fd(ff, buf, size); 133 131 return __do_write_buf(ff, buf, size); 132 + } 133 + 134 + /* Return: 0 if succeded, -ERR if failed. */ 135 + static int do_write_bitmap(struct feat_fd *ff, unsigned long *set, u64 size) 136 + { 137 + u64 *p = (u64 *) set; 138 + int i, ret; 139 + 140 + ret = do_write(ff, &size, sizeof(size)); 141 + if (ret < 0) 142 + return ret; 143 + 144 + for (i = 0; (u64) i < BITS_TO_U64(size); i++) { 145 + ret = do_write(ff, p + i, sizeof(*p)); 146 + if (ret < 0) 147 + return ret; 148 + } 149 + 150 + return 0; 134 151 } 135 152 136 153 /* Return: 0 if succeded, -ERR if failed. */ ··· 262 241 263 242 free(buf); 264 243 return NULL; 244 + } 245 + 246 + /* Return: 0 if succeded, -ERR if failed. */ 247 + static int do_read_bitmap(struct feat_fd *ff, unsigned long **pset, u64 *psize) 248 + { 249 + unsigned long *set; 250 + u64 size, *p; 251 + int i, ret; 252 + 253 + ret = do_read_u64(ff, &size); 254 + if (ret) 255 + return ret; 256 + 257 + set = bitmap_alloc(size); 258 + if (!set) 259 + return -ENOMEM; 260 + 261 + bitmap_zero(set, size); 262 + 263 + p = (u64 *) set; 264 + 265 + for (i = 0; (u64) i < BITS_TO_U64(size); i++) { 266 + ret = do_read_u64(ff, p + i); 267 + if (ret < 0) { 268 + free(set); 269 + return ret; 270 + } 271 + } 272 + 273 + *pset = set; 274 + *psize = size; 275 + return 0; 265 276 } 266 277 267 278 static int write_tracing_data(struct feat_fd *ff, ··· 1249 1196 sizeof(evlist->last_sample_time)); 1250 1197 } 1251 1198 1199 + 1200 + static int memory_node__read(struct memory_node *n, unsigned long idx) 1201 + { 1202 + unsigned int phys, size = 0; 1203 + char path[PATH_MAX]; 1204 + struct dirent *ent; 1205 + DIR *dir; 1206 + 1207 + #define for_each_memory(mem, dir) \ 1208 + while ((ent = readdir(dir))) \ 1209 + if (strcmp(ent->d_name, ".") && \ 1210 + strcmp(ent->d_name, "..") && \ 1211 + sscanf(ent->d_name, "memory%u", &mem) == 1) 1212 + 1213 + scnprintf(path, PATH_MAX, 1214 + "%s/devices/system/node/node%lu", 1215 + sysfs__mountpoint(), idx); 1216 + 1217 + dir = opendir(path); 1218 + if (!dir) { 1219 + pr_warning("failed: cant' open memory sysfs data\n"); 1220 + return -1; 1221 + } 1222 + 1223 + for_each_memory(phys, dir) { 1224 + size = max(phys, size); 1225 + } 1226 + 1227 + size++; 1228 + 1229 + n->set = bitmap_alloc(size); 1230 + if (!n->set) { 1231 + closedir(dir); 1232 + return -ENOMEM; 1233 + } 1234 + 1235 + bitmap_zero(n->set, size); 1236 + n->node = idx; 1237 + n->size = size; 1238 + 1239 + rewinddir(dir); 1240 + 1241 + for_each_memory(phys, dir) { 1242 + set_bit(phys, n->set); 1243 + } 1244 + 1245 + closedir(dir); 1246 + return 0; 1247 + } 1248 + 1249 + static int memory_node__sort(const void *a, const void *b) 1250 + { 1251 + const struct memory_node *na = a; 1252 + const struct memory_node *nb = b; 1253 + 1254 + return na->node - nb->node; 1255 + } 1256 + 1257 + static int build_mem_topology(struct memory_node *nodes, u64 size, u64 *cntp) 1258 + { 1259 + char path[PATH_MAX]; 1260 + struct dirent *ent; 1261 + DIR *dir; 1262 + u64 cnt = 0; 1263 + int ret = 0; 1264 + 1265 + scnprintf(path, PATH_MAX, "%s/devices/system/node/", 1266 + sysfs__mountpoint()); 1267 + 1268 + dir = opendir(path); 1269 + if (!dir) { 1270 + pr_warning("failed: can't open node sysfs data\n"); 1271 + return -1; 1272 + } 1273 + 1274 + while (!ret && (ent = readdir(dir))) { 1275 + unsigned int idx; 1276 + int r; 1277 + 1278 + if (!strcmp(ent->d_name, ".") || 1279 + !strcmp(ent->d_name, "..")) 1280 + continue; 1281 + 1282 + r = sscanf(ent->d_name, "node%u", &idx); 1283 + if (r != 1) 1284 + continue; 1285 + 1286 + if (WARN_ONCE(cnt >= size, 1287 + "failed to write MEM_TOPOLOGY, way too many nodes\n")) 1288 + return -1; 1289 + 1290 + ret = memory_node__read(&nodes[cnt++], idx); 1291 + } 1292 + 1293 + *cntp = cnt; 1294 + closedir(dir); 1295 + 1296 + if (!ret) 1297 + qsort(nodes, cnt, sizeof(nodes[0]), memory_node__sort); 1298 + 1299 + return ret; 1300 + } 1301 + 1302 + #define MAX_MEMORY_NODES 2000 1303 + 1304 + /* 1305 + * The MEM_TOPOLOGY holds physical memory map for every 1306 + * node in system. The format of data is as follows: 1307 + * 1308 + * 0 - version | for future changes 1309 + * 8 - block_size_bytes | /sys/devices/system/memory/block_size_bytes 1310 + * 16 - count | number of nodes 1311 + * 1312 + * For each node we store map of physical indexes for 1313 + * each node: 1314 + * 1315 + * 32 - node id | node index 1316 + * 40 - size | size of bitmap 1317 + * 48 - bitmap | bitmap of memory indexes that belongs to node 1318 + */ 1319 + static int write_mem_topology(struct feat_fd *ff __maybe_unused, 1320 + struct perf_evlist *evlist __maybe_unused) 1321 + { 1322 + static struct memory_node nodes[MAX_MEMORY_NODES]; 1323 + u64 bsize, version = 1, i, nr; 1324 + int ret; 1325 + 1326 + ret = sysfs__read_xll("devices/system/memory/block_size_bytes", 1327 + (unsigned long long *) &bsize); 1328 + if (ret) 1329 + return ret; 1330 + 1331 + ret = build_mem_topology(&nodes[0], MAX_MEMORY_NODES, &nr); 1332 + if (ret) 1333 + return ret; 1334 + 1335 + ret = do_write(ff, &version, sizeof(version)); 1336 + if (ret < 0) 1337 + goto out; 1338 + 1339 + ret = do_write(ff, &bsize, sizeof(bsize)); 1340 + if (ret < 0) 1341 + goto out; 1342 + 1343 + ret = do_write(ff, &nr, sizeof(nr)); 1344 + if (ret < 0) 1345 + goto out; 1346 + 1347 + for (i = 0; i < nr; i++) { 1348 + struct memory_node *n = &nodes[i]; 1349 + 1350 + #define _W(v) \ 1351 + ret = do_write(ff, &n->v, sizeof(n->v)); \ 1352 + if (ret < 0) \ 1353 + goto out; 1354 + 1355 + _W(node) 1356 + _W(size) 1357 + 1358 + #undef _W 1359 + 1360 + ret = do_write_bitmap(ff, n->set, n->size); 1361 + if (ret < 0) 1362 + goto out; 1363 + } 1364 + 1365 + out: 1366 + return ret; 1367 + } 1368 + 1252 1369 static void print_hostname(struct feat_fd *ff, FILE *fp) 1253 1370 { 1254 1371 fprintf(fp, "# hostname : %s\n", ff->ph->env.hostname); ··· 1764 1541 session->evlist->first_sample_time) / NSEC_PER_MSEC; 1765 1542 1766 1543 fprintf(fp, "# sample duration : %10.3f ms\n", d); 1544 + } 1545 + 1546 + static void memory_node__fprintf(struct memory_node *n, 1547 + unsigned long long bsize, FILE *fp) 1548 + { 1549 + char buf_map[100], buf_size[50]; 1550 + unsigned long long size; 1551 + 1552 + size = bsize * bitmap_weight(n->set, n->size); 1553 + unit_number__scnprintf(buf_size, 50, size); 1554 + 1555 + bitmap_scnprintf(n->set, n->size, buf_map, 100); 1556 + fprintf(fp, "# %3" PRIu64 " [%s]: %s\n", n->node, buf_size, buf_map); 1557 + } 1558 + 1559 + static void print_mem_topology(struct feat_fd *ff, FILE *fp) 1560 + { 1561 + struct memory_node *nodes; 1562 + int i, nr; 1563 + 1564 + nodes = ff->ph->env.memory_nodes; 1565 + nr = ff->ph->env.nr_memory_nodes; 1566 + 1567 + fprintf(fp, "# memory nodes (nr %d, block size 0x%llx):\n", 1568 + nr, ff->ph->env.memory_bsize); 1569 + 1570 + for (i = 0; i < nr; i++) { 1571 + memory_node__fprintf(&nodes[i], ff->ph->env.memory_bsize, fp); 1572 + } 1767 1573 } 1768 1574 1769 1575 static int __event_process_build_id(struct build_id_event *bev, ··· 2457 2205 return 0; 2458 2206 } 2459 2207 2208 + static int process_mem_topology(struct feat_fd *ff, 2209 + void *data __maybe_unused) 2210 + { 2211 + struct memory_node *nodes; 2212 + u64 version, i, nr, bsize; 2213 + int ret = -1; 2214 + 2215 + if (do_read_u64(ff, &version)) 2216 + return -1; 2217 + 2218 + if (version != 1) 2219 + return -1; 2220 + 2221 + if (do_read_u64(ff, &bsize)) 2222 + return -1; 2223 + 2224 + if (do_read_u64(ff, &nr)) 2225 + return -1; 2226 + 2227 + nodes = zalloc(sizeof(*nodes) * nr); 2228 + if (!nodes) 2229 + return -1; 2230 + 2231 + for (i = 0; i < nr; i++) { 2232 + struct memory_node n; 2233 + 2234 + #define _R(v) \ 2235 + if (do_read_u64(ff, &n.v)) \ 2236 + goto out; \ 2237 + 2238 + _R(node) 2239 + _R(size) 2240 + 2241 + #undef _R 2242 + 2243 + if (do_read_bitmap(ff, &n.set, &n.size)) 2244 + goto out; 2245 + 2246 + nodes[i] = n; 2247 + } 2248 + 2249 + ff->ph->env.memory_bsize = bsize; 2250 + ff->ph->env.memory_nodes = nodes; 2251 + ff->ph->env.nr_memory_nodes = nr; 2252 + ret = 0; 2253 + 2254 + out: 2255 + if (ret) 2256 + free(nodes); 2257 + return ret; 2258 + } 2259 + 2460 2260 struct feature_ops { 2461 2261 int (*write)(struct feat_fd *ff, struct perf_evlist *evlist); 2462 2262 void (*print)(struct feat_fd *ff, FILE *fp); ··· 2567 2263 FEAT_OPN(STAT, stat, false), 2568 2264 FEAT_OPN(CACHE, cache, true), 2569 2265 FEAT_OPR(SAMPLE_TIME, sample_time, false), 2266 + FEAT_OPR(MEM_TOPOLOGY, mem_topology, true), 2570 2267 }; 2571 2268 2572 2269 struct header_print_data {
+1
tools/perf/util/header.h
··· 36 36 HEADER_STAT, 37 37 HEADER_CACHE, 38 38 HEADER_SAMPLE_TIME, 39 + HEADER_MEM_TOPOLOGY, 39 40 HEADER_LAST_FEATURE, 40 41 HEADER_FEAT_BITS = 256, 41 42 };