Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf header: Support CPU DOMAIN relation info

The '/proc/schedstat' file gives info about load balancing statistics
within a given domain.

It also contains the cpu_mask giving information about the sibling cpus
and domain names after schedstat version 17.

Storing this information in perf header will help tools like `perf sched
stats` for better analysis.

Signed-off-by: Swapnil Sapkal <swapnil.sapkal@amd.com>
Tested-by: Chen Yu <yu.c.chen@intel.com>
Acked-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Anubhav Shelat <ashelat@redhat.com>
Cc: Ben Gainey <ben.gainey@arm.com>
Cc: Blake Jones <blakejones@google.com>
Cc: Chun-Tse Shao <ctshao@google.com>
Cc: David Vernet <void@manifault.com>
Cc: Dmitriy Vyukov <dvyukov@google.com>
Cc: Dr. David Alan Gilbert <linux@treblig.org>
Cc: Gautham Shenoy <gautham.shenoy@amd.com>
Cc: Graham Woodward <graham.woodward@arm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: K Prateek Nayak <kprateek.nayak@amd.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Leo Yan <leo.yan@arm.com>
Cc: Madadi Vineeth Reddy <vineethr@linux.ibm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Cc: Sandipan Das <sandipan.das@amd.com>
Cc: Santosh Shukla <santosh.shukla@amd.com>
Cc: Shrikanth Hegde <sshegde@linux.ibm.com>
Cc: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Thomas Falcon <thomas.falcon@intel.com>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: Yang Jihong <yangjihong@bytedance.com>
Cc: Yujie Liu <yujie.liu@intel.com>
Cc: Zhongqiu Han <quic_zhonhan@quicinc.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

authored by

Swapnil Sapkal and committed by
Arnaldo Carvalho de Melo
d40c68a4 cb68cba4

+396
+17
tools/perf/Documentation/perf.data-file-format.txt
··· 447 447 } [nr_pmu]; 448 448 }; 449 449 450 + HEADER_CPU_DOMAIN_INFO = 32, 451 + 452 + List of cpu-domain relation info. The format of the data is as below. 453 + 454 + struct domain_info { 455 + int domain; 456 + char dname[]; 457 + char cpumask[]; 458 + char cpulist[]; 459 + }; 460 + 461 + struct cpu_domain_info { 462 + int cpu; 463 + int nr_domains; 464 + struct domain_info domains[]; 465 + }; 466 + 450 467 other bits are reserved and should ignored for now 451 468 HEADER_FEAT_BITS = 256, 452 469
+1
tools/perf/builtin-inject.c
··· 2133 2133 case HEADER_CLOCK_DATA: 2134 2134 case HEADER_HYBRID_TOPOLOGY: 2135 2135 case HEADER_PMU_CAPS: 2136 + case HEADER_CPU_DOMAIN_INFO: 2136 2137 return true; 2137 2138 /* Information that can be updated */ 2138 2139 case HEADER_BUILD_ID:
+29
tools/perf/util/env.c
··· 216 216 } 217 217 #endif // HAVE_LIBBPF_SUPPORT 218 218 219 + void free_cpu_domain_info(struct cpu_domain_map **cd_map, u32 schedstat_version, u32 nr) 220 + { 221 + if (!cd_map) 222 + return; 223 + 224 + for (u32 i = 0; i < nr; i++) { 225 + if (!cd_map[i]) 226 + continue; 227 + 228 + for (u32 j = 0; j < cd_map[i]->nr_domains; j++) { 229 + struct domain_info *d_info = cd_map[i]->domains[j]; 230 + 231 + if (!d_info) 232 + continue; 233 + 234 + if (schedstat_version >= 17) 235 + zfree(&d_info->dname); 236 + 237 + zfree(&d_info->cpumask); 238 + zfree(&d_info->cpulist); 239 + zfree(&d_info); 240 + } 241 + zfree(&cd_map[i]->domains); 242 + zfree(&cd_map[i]); 243 + } 244 + zfree(&cd_map); 245 + } 246 + 219 247 void perf_env__exit(struct perf_env *env) 220 248 { 221 249 int i, j; ··· 293 265 zfree(&env->pmu_caps[i].pmu_name); 294 266 } 295 267 zfree(&env->pmu_caps); 268 + free_cpu_domain_info(env->cpu_domain, env->schedstat_version, env->nr_cpus_avail); 296 269 } 297 270 298 271 void perf_env__init(struct perf_env *env)
+17
tools/perf/util/env.h
··· 54 54 char *pmu_name; 55 55 }; 56 56 57 + struct domain_info { 58 + u32 domain; 59 + char *dname; 60 + char *cpumask; 61 + char *cpulist; 62 + }; 63 + 64 + struct cpu_domain_map { 65 + u32 cpu; 66 + u32 nr_domains; 67 + struct domain_info **domains; 68 + }; 69 + 57 70 typedef const char *(arch_syscalls__strerrno_t)(int err); 58 71 59 72 struct perf_env { ··· 83 70 unsigned int max_branches; 84 71 unsigned int br_cntr_nr; 85 72 unsigned int br_cntr_width; 73 + unsigned int schedstat_version; 74 + unsigned int max_sched_domains; 86 75 int kernel_is_64_bit; 87 76 88 77 int nr_cmdline; ··· 107 92 char **cpu_pmu_caps; 108 93 struct cpu_topology_map *cpu; 109 94 struct cpu_cache_level *caches; 95 + struct cpu_domain_map **cpu_domain; 110 96 int caches_cnt; 111 97 u32 comp_ratio; 112 98 u32 comp_ver; ··· 167 151 struct btf_node; 168 152 169 153 int perf_env__read_core_pmu_caps(struct perf_env *env); 154 + void free_cpu_domain_info(struct cpu_domain_map **cd_map, u32 schedstat_version, u32 nr); 170 155 void perf_env__exit(struct perf_env *env); 171 156 172 157 int perf_env__kernel_is_64_bit(struct perf_env *env);
+286
tools/perf/util/header.c
··· 1614 1614 return 0; 1615 1615 } 1616 1616 1617 + static struct cpu_domain_map **build_cpu_domain_map(u32 *schedstat_version, u32 *max_sched_domains, 1618 + u32 nr) 1619 + { 1620 + struct domain_info *domain_info; 1621 + struct cpu_domain_map **cd_map; 1622 + char dname[16], cpumask[256]; 1623 + char cpulist[1024]; 1624 + char *line = NULL; 1625 + u32 cpu, domain; 1626 + u32 dcount = 0; 1627 + size_t len; 1628 + FILE *fp; 1629 + 1630 + fp = fopen("/proc/schedstat", "r"); 1631 + if (!fp) { 1632 + pr_err("Failed to open /proc/schedstat\n"); 1633 + return NULL; 1634 + } 1635 + 1636 + cd_map = zalloc(sizeof(*cd_map) * nr); 1637 + if (!cd_map) 1638 + goto out; 1639 + 1640 + while (getline(&line, &len, fp) > 0) { 1641 + int retval; 1642 + 1643 + if (strncmp(line, "version", 7) == 0) { 1644 + retval = sscanf(line, "version %d\n", schedstat_version); 1645 + if (retval != 1) 1646 + continue; 1647 + 1648 + } else if (strncmp(line, "cpu", 3) == 0) { 1649 + retval = sscanf(line, "cpu%u %*s", &cpu); 1650 + if (retval == 1) { 1651 + cd_map[cpu] = zalloc(sizeof(*cd_map[cpu])); 1652 + if (!cd_map[cpu]) 1653 + goto out_free_line; 1654 + cd_map[cpu]->cpu = cpu; 1655 + } else 1656 + continue; 1657 + 1658 + dcount = 0; 1659 + } else if (strncmp(line, "domain", 6) == 0) { 1660 + struct domain_info **temp_domains; 1661 + 1662 + dcount++; 1663 + temp_domains = realloc(cd_map[cpu]->domains, dcount * sizeof(domain_info)); 1664 + if (!temp_domains) 1665 + goto out_free_line; 1666 + else 1667 + cd_map[cpu]->domains = temp_domains; 1668 + 1669 + domain_info = zalloc(sizeof(*domain_info)); 1670 + if (!domain_info) 1671 + goto out_free_line; 1672 + 1673 + cd_map[cpu]->domains[dcount - 1] = domain_info; 1674 + 1675 + if (*schedstat_version >= 17) { 1676 + retval = sscanf(line, "domain%u %s %s %*s", &domain, dname, 1677 + cpumask); 1678 + if (retval != 3) 1679 + continue; 1680 + 1681 + domain_info->dname = strdup(dname); 1682 + if (!domain_info->dname) 1683 + goto out_free_line; 1684 + } else { 1685 + retval = sscanf(line, "domain%u %s %*s", &domain, cpumask); 1686 + if (retval != 2) 1687 + continue; 1688 + } 1689 + 1690 + domain_info->domain = domain; 1691 + if (domain > *max_sched_domains) 1692 + *max_sched_domains = domain; 1693 + 1694 + domain_info->cpumask = strdup(cpumask); 1695 + if (!domain_info->cpumask) 1696 + goto out_free_line; 1697 + 1698 + cpumask_to_cpulist(cpumask, cpulist); 1699 + domain_info->cpulist = strdup(cpulist); 1700 + if (!domain_info->cpulist) 1701 + goto out_free_line; 1702 + 1703 + cd_map[cpu]->nr_domains = dcount; 1704 + } 1705 + } 1706 + 1707 + out_free_line: 1708 + free(line); 1709 + out: 1710 + fclose(fp); 1711 + return cd_map; 1712 + } 1713 + 1714 + static int write_cpu_domain_info(struct feat_fd *ff, 1715 + struct evlist *evlist __maybe_unused) 1716 + { 1717 + u32 max_sched_domains = 0, schedstat_version = 0; 1718 + struct cpu_domain_map **cd_map; 1719 + u32 i, j, nr, ret; 1720 + 1721 + nr = cpu__max_present_cpu().cpu; 1722 + 1723 + cd_map = build_cpu_domain_map(&schedstat_version, &max_sched_domains, nr); 1724 + if (!cd_map) 1725 + return -1; 1726 + 1727 + ret = do_write(ff, &schedstat_version, sizeof(u32)); 1728 + if (ret < 0) 1729 + goto out; 1730 + 1731 + max_sched_domains += 1; 1732 + ret = do_write(ff, &max_sched_domains, sizeof(u32)); 1733 + if (ret < 0) 1734 + goto out; 1735 + 1736 + for (i = 0; i < nr; i++) { 1737 + if (!cd_map[i]) 1738 + continue; 1739 + 1740 + ret = do_write(ff, &cd_map[i]->cpu, sizeof(u32)); 1741 + if (ret < 0) 1742 + goto out; 1743 + 1744 + ret = do_write(ff, &cd_map[i]->nr_domains, sizeof(u32)); 1745 + if (ret < 0) 1746 + goto out; 1747 + 1748 + for (j = 0; j < cd_map[i]->nr_domains; j++) { 1749 + ret = do_write(ff, &cd_map[i]->domains[j]->domain, sizeof(u32)); 1750 + if (ret < 0) 1751 + goto out; 1752 + if (schedstat_version >= 17) { 1753 + ret = do_write_string(ff, cd_map[i]->domains[j]->dname); 1754 + if (ret < 0) 1755 + goto out; 1756 + } 1757 + 1758 + ret = do_write_string(ff, cd_map[i]->domains[j]->cpumask); 1759 + if (ret < 0) 1760 + goto out; 1761 + 1762 + ret = do_write_string(ff, cd_map[i]->domains[j]->cpulist); 1763 + if (ret < 0) 1764 + goto out; 1765 + } 1766 + } 1767 + 1768 + out: 1769 + free_cpu_domain_info(cd_map, schedstat_version, nr); 1770 + return ret; 1771 + } 1772 + 1617 1773 static void print_hostname(struct feat_fd *ff, FILE *fp) 1618 1774 { 1619 1775 fprintf(fp, "# hostname : %s\n", ff->ph->env.hostname); ··· 2400 2244 2401 2245 for (i = 0; i < nr; i++) { 2402 2246 memory_node__fprintf(&nodes[i], env->memory_bsize, fp); 2247 + } 2248 + } 2249 + 2250 + static void print_cpu_domain_info(struct feat_fd *ff, FILE *fp) 2251 + { 2252 + struct cpu_domain_map **cd_map = ff->ph->env.cpu_domain; 2253 + u32 nr = ff->ph->env.nr_cpus_avail; 2254 + struct domain_info *d_info; 2255 + u32 i, j; 2256 + 2257 + fprintf(fp, "# schedstat version : %u\n", ff->ph->env.schedstat_version); 2258 + fprintf(fp, "# Maximum sched domains : %u\n", ff->ph->env.max_sched_domains); 2259 + 2260 + for (i = 0; i < nr; i++) { 2261 + if (!cd_map[i]) 2262 + continue; 2263 + 2264 + fprintf(fp, "# cpu : %u\n", cd_map[i]->cpu); 2265 + fprintf(fp, "# nr_domains : %u\n", cd_map[i]->nr_domains); 2266 + 2267 + for (j = 0; j < cd_map[i]->nr_domains; j++) { 2268 + d_info = cd_map[i]->domains[j]; 2269 + if (!d_info) 2270 + continue; 2271 + 2272 + fprintf(fp, "# Domain : %u\n", d_info->domain); 2273 + 2274 + if (ff->ph->env.schedstat_version >= 17) 2275 + fprintf(fp, "# Domain name : %s\n", d_info->dname); 2276 + 2277 + fprintf(fp, "# Domain cpu map : %s\n", d_info->cpumask); 2278 + fprintf(fp, "# Domain cpu list : %s\n", d_info->cpulist); 2279 + } 2403 2280 } 2404 2281 } 2405 2282 ··· 3577 3388 return ret; 3578 3389 } 3579 3390 3391 + static int process_cpu_domain_info(struct feat_fd *ff, void *data __maybe_unused) 3392 + { 3393 + u32 schedstat_version, max_sched_domains, cpu, domain, nr_domains; 3394 + struct perf_env *env = &ff->ph->env; 3395 + char *dname, *cpumask, *cpulist; 3396 + struct cpu_domain_map **cd_map; 3397 + struct domain_info *d_info; 3398 + u32 nra, nr, i, j; 3399 + int ret; 3400 + 3401 + nra = env->nr_cpus_avail; 3402 + nr = env->nr_cpus_online; 3403 + 3404 + cd_map = zalloc(sizeof(*cd_map) * nra); 3405 + if (!cd_map) 3406 + return -1; 3407 + 3408 + env->cpu_domain = cd_map; 3409 + 3410 + ret = do_read_u32(ff, &schedstat_version); 3411 + if (ret) 3412 + return ret; 3413 + 3414 + env->schedstat_version = schedstat_version; 3415 + 3416 + ret = do_read_u32(ff, &max_sched_domains); 3417 + if (ret) 3418 + return ret; 3419 + 3420 + env->max_sched_domains = max_sched_domains; 3421 + 3422 + for (i = 0; i < nr; i++) { 3423 + if (do_read_u32(ff, &cpu)) 3424 + return -1; 3425 + 3426 + cd_map[cpu] = zalloc(sizeof(*cd_map[cpu])); 3427 + if (!cd_map[cpu]) 3428 + return -1; 3429 + 3430 + cd_map[cpu]->cpu = cpu; 3431 + 3432 + if (do_read_u32(ff, &nr_domains)) 3433 + return -1; 3434 + 3435 + cd_map[cpu]->nr_domains = nr_domains; 3436 + 3437 + cd_map[cpu]->domains = zalloc(sizeof(*d_info) * max_sched_domains); 3438 + if (!cd_map[cpu]->domains) 3439 + return -1; 3440 + 3441 + for (j = 0; j < nr_domains; j++) { 3442 + if (do_read_u32(ff, &domain)) 3443 + return -1; 3444 + 3445 + d_info = zalloc(sizeof(*d_info)); 3446 + if (!d_info) 3447 + return -1; 3448 + 3449 + cd_map[cpu]->domains[domain] = d_info; 3450 + d_info->domain = domain; 3451 + 3452 + if (schedstat_version >= 17) { 3453 + dname = do_read_string(ff); 3454 + if (!dname) 3455 + return -1; 3456 + 3457 + d_info->dname = zalloc(strlen(dname) + 1); 3458 + if (!d_info->dname) 3459 + return -1; 3460 + 3461 + d_info->dname = strdup(dname); 3462 + } 3463 + 3464 + cpumask = do_read_string(ff); 3465 + if (!cpumask) 3466 + return -1; 3467 + 3468 + d_info->cpumask = zalloc(strlen(cpumask) + 1); 3469 + if (!d_info->cpumask) 3470 + return -1; 3471 + d_info->cpumask = strdup(cpumask); 3472 + 3473 + cpulist = do_read_string(ff); 3474 + if (!cpulist) 3475 + return -1; 3476 + 3477 + d_info->cpulist = zalloc(strlen(cpulist) + 1); 3478 + if (!d_info->cpulist) 3479 + return -1; 3480 + d_info->cpulist = strdup(cpulist); 3481 + } 3482 + } 3483 + 3484 + return ret; 3485 + } 3486 + 3580 3487 #define FEAT_OPR(n, func, __full_only) \ 3581 3488 [HEADER_##n] = { \ 3582 3489 .name = __stringify(n), \ ··· 3738 3453 FEAT_OPR(CLOCK_DATA, clock_data, false), 3739 3454 FEAT_OPN(HYBRID_TOPOLOGY, hybrid_topology, true), 3740 3455 FEAT_OPR(PMU_CAPS, pmu_caps, false), 3456 + FEAT_OPR(CPU_DOMAIN_INFO, cpu_domain_info, true), 3741 3457 }; 3742 3458 3743 3459 struct header_print_data {
+1
tools/perf/util/header.h
··· 53 53 HEADER_CLOCK_DATA, 54 54 HEADER_HYBRID_TOPOLOGY, 55 55 HEADER_PMU_CAPS, 56 + HEADER_CPU_DOMAIN_INFO, 56 57 HEADER_LAST_FEATURE, 57 58 HEADER_FEAT_BITS = 256, 58 59 };
+42
tools/perf/util/util.c
··· 257 257 return 0; 258 258 } 259 259 260 + void cpumask_to_cpulist(char *cpumask, char *cpulist) 261 + { 262 + int i, j, bm_size, nbits; 263 + int len = strlen(cpumask); 264 + unsigned long *bm; 265 + char cpus[1024]; 266 + 267 + for (i = 0; i < len; i++) { 268 + if (cpumask[i] == ',') { 269 + for (j = i; j < len; j++) 270 + cpumask[j] = cpumask[j + 1]; 271 + } 272 + } 273 + 274 + len = strlen(cpumask); 275 + bm_size = (len + 15) / 16; 276 + nbits = bm_size * 64; 277 + if (nbits <= 0) 278 + return; 279 + 280 + bm = calloc(bm_size, sizeof(unsigned long)); 281 + if (!cpumask) 282 + goto free_bm; 283 + 284 + for (i = 0; i < bm_size; i++) { 285 + char blk[17]; 286 + int blklen = len > 16 ? 16 : len; 287 + 288 + strncpy(blk, cpumask + len - blklen, blklen); 289 + blk[blklen] = '\0'; 290 + bm[i] = strtoul(blk, NULL, 16); 291 + cpumask[len - blklen] = '\0'; 292 + len = strlen(cpumask); 293 + } 294 + 295 + bitmap_scnprintf(bm, nbits, cpus, sizeof(cpus)); 296 + strcpy(cpulist, cpus); 297 + 298 + free_bm: 299 + free(bm); 300 + } 301 + 260 302 int rm_rf_perf_data(const char *path) 261 303 { 262 304 const char *pat[] = {
+3
tools/perf/util/util.h
··· 11 11 #include <stdbool.h> 12 12 #include <stddef.h> 13 13 #include <linux/compiler.h> 14 + #include <linux/bitmap.h> 14 15 #include <sys/types.h> 15 16 #ifndef __cplusplus 16 17 #include <internal/cpumap.h> ··· 48 47 bool sysctl__nmi_watchdog_enabled(void); 49 48 50 49 int perf_tip(char **strp, const char *dirpath); 50 + 51 + void cpumask_to_cpulist(char *cpumask, char *cpulist); 51 52 52 53 #ifndef HAVE_SCHED_GETCPU_SUPPORT 53 54 int sched_getcpu(void);