Merge branch 'pm-tools'

Merge turbostat utility fixes for final 4.18:

- Fix the -S option on 1-CPU systems.
- Fix computations using incorrect processor core counts.
- Fix the x2apic debug message.
- Fix logical node enumeration to allow for non-sequential physical nodes.
- Fix reported family on modern AMD processors.
- Clarify the RAPL column information in the man page.

* pm-tools:
tools/power turbostat: version 18.07.27
tools/power turbostat: Read extended processor family from CPUID
tools/power turbostat: Fix logical node enumeration to allow for non-sequential physical nodes
tools/power turbostat: fix x2apic debug message output file
tools/power turbostat: fix bogus summary values
tools/power turbostat: fix -S on UP systems
tools/power turbostat: Update turbostat(8) RAPL throttling column description

+57 -63
+2 -2
tools/power/x86/turbostat/turbostat.8
··· 106 \fBC1%, C2%, C3%\fP The residency percentage that Linux requested C1, C2, C3.... The system summary is the average of all CPUs in the system. Note that these are software, reflecting what was requested. The hardware counters reflect what was actually achieved. 107 \fBCPU%c1, CPU%c3, CPU%c6, CPU%c7\fP show the percentage residency in hardware core idle states. These numbers are from hardware residency counters. 108 \fBCoreTmp\fP Degrees Celsius reported by the per-core Digital Thermal Sensor. 109 - \fBPkgTtmp\fP Degrees Celsius reported by the per-package Package Thermal Monitor. 110 \fBGFX%rc6\fP The percentage of time the GPU is in the "render C6" state, rc6, during the measurement interval. From /sys/class/drm/card0/power/rc6_residency_ms. 111 \fBGFXMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz. 112 \fBPkg%pc2, Pkg%pc3, Pkg%pc6, Pkg%pc7\fP percentage residency in hardware package idle states. These numbers are from hardware residency counters. ··· 114 \fBCorWatt\fP Watts consumed by the core part of the package. 115 \fBGFXWatt\fP Watts consumed by the Graphics part of the package -- available only on client processors. 116 \fBRAMWatt\fP Watts consumed by the DRAM DIMMS -- available only on server processors. 117 - \fBPKG_%\fP percent of the interval that RAPL throttling was active on the Package. 118 \fBRAM_%\fP percent of the interval that RAPL throttling was active on DRAM. 119 .fi 120 .SH TOO MUCH INFORMATION EXAMPLE
··· 106 \fBC1%, C2%, C3%\fP The residency percentage that Linux requested C1, C2, C3.... The system summary is the average of all CPUs in the system. Note that these are software, reflecting what was requested. The hardware counters reflect what was actually achieved. 107 \fBCPU%c1, CPU%c3, CPU%c6, CPU%c7\fP show the percentage residency in hardware core idle states. These numbers are from hardware residency counters. 108 \fBCoreTmp\fP Degrees Celsius reported by the per-core Digital Thermal Sensor. 109 + \fBPkgTmp\fP Degrees Celsius reported by the per-package Package Thermal Monitor. 110 \fBGFX%rc6\fP The percentage of time the GPU is in the "render C6" state, rc6, during the measurement interval. From /sys/class/drm/card0/power/rc6_residency_ms. 111 \fBGFXMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz. 112 \fBPkg%pc2, Pkg%pc3, Pkg%pc6, Pkg%pc7\fP percentage residency in hardware package idle states. These numbers are from hardware residency counters. ··· 114 \fBCorWatt\fP Watts consumed by the core part of the package. 115 \fBGFXWatt\fP Watts consumed by the Graphics part of the package -- available only on client processors. 116 \fBRAMWatt\fP Watts consumed by the DRAM DIMMS -- available only on server processors. 117 + \fBPKG_%\fP percent of the interval that RAPL throttling was active on the Package. Note that the system summary is the sum of the package throttling time, and thus may be higher than 100% on a multi-package system. Note that the meaning of this field is model specific. For example, some hardware increments this counter when RAPL responds to thermal limits, but does not increment this counter when RAPL responds to power limits. Comparing PkgWatt and PkgTmp to system limits is necessary. 118 \fBRAM_%\fP percent of the interval that RAPL throttling was active on DRAM. 119 .fi 120 .SH TOO MUCH INFORMATION EXAMPLE
+55 -61
tools/power/x86/turbostat/turbostat.c
··· 1163 if (!printed || !summary_only) 1164 print_header("\t"); 1165 1166 - if (topo.num_cpus > 1) 1167 - format_counters(&average.threads, &average.cores, 1168 - &average.packages); 1169 1170 printed = 1; 1171 ··· 1690 t->x2apic_id = edx; 1691 1692 if (debug && (t->apic_id != t->x2apic_id)) 1693 - fprintf(stderr, "cpu%d: apic 0x%x x2apic 0x%x\n", t->cpu_id, t->apic_id, t->x2apic_id); 1694 } 1695 1696 /* ··· 2471 2472 void set_node_data(void) 2473 { 2474 - char path[80]; 2475 - FILE *filep; 2476 - int pkg, node, cpu; 2477 2478 - struct pkg_node_info { 2479 - int count; 2480 - int min; 2481 - } *pni; 2482 2483 - pni = calloc(topo.num_packages, sizeof(struct pkg_node_info)); 2484 - if (!pni) 2485 - err(1, "calloc pkg_node_count"); 2486 - 2487 - for (pkg = 0; pkg < topo.num_packages; pkg++) 2488 - pni[pkg].min = topo.num_cpus; 2489 - 2490 - for (node = 0; node <= topo.max_node_num; node++) { 2491 - /* find the "first" cpu in the node */ 2492 - sprintf(path, "/sys/bus/node/devices/node%d/cpulist", node); 2493 - filep = fopen(path, "r"); 2494 - if (!filep) 2495 - continue; 2496 - fscanf(filep, "%d", &cpu); 2497 - fclose(filep); 2498 - 2499 - pkg = cpus[cpu].physical_package_id; 2500 - pni[pkg].count++; 2501 - 2502 - if (node < pni[pkg].min) 2503 - pni[pkg].min = node; 2504 } 2505 - 2506 - for (pkg = 0; pkg < topo.num_packages; pkg++) 2507 - if (pni[pkg].count > topo.nodes_per_pkg) 2508 - topo.nodes_per_pkg = pni[0].count; 2509 - 2510 - /* Fake 1 node per pkg for machines that don't 2511 - * expose nodes and thus avoid -nan results 2512 - */ 2513 - if (topo.nodes_per_pkg == 0) 2514 - topo.nodes_per_pkg = 1; 2515 - 2516 - for (cpu = 0; cpu < topo.num_cpus; cpu++) { 2517 - pkg = cpus[cpu].physical_package_id; 2518 - node = cpus[cpu].physical_node_id; 2519 - cpus[cpu].logical_node_id = node - pni[pkg].min; 2520 - } 2521 - free(pni); 2522 - 2523 } 2524 2525 int get_physical_node_id(struct cpu_topology *thiscpu) ··· 4457 family = (fms >> 8) & 0xf; 4458 model = (fms >> 4) & 0xf; 4459 stepping = fms & 0xf; 4460 - if (family == 6 || family == 0xf) 4461 model += ((fms >> 16) & 0xf) << 4; 4462 4463 if (!quiet) { ··· 4828 siblings = get_thread_siblings(&cpus[i]); 4829 if (siblings > max_siblings) 4830 max_siblings = siblings; 4831 - if (cpus[i].thread_id != -1) 4832 topo.num_cores++; 4833 - 4834 - if (debug > 1) 4835 - fprintf(outf, 4836 - "cpu %d pkg %d node %d core %d thread %d\n", 4837 - i, cpus[i].physical_package_id, 4838 - cpus[i].physical_node_id, 4839 - cpus[i].physical_core_id, 4840 - cpus[i].thread_id); 4841 } 4842 4843 topo.cores_per_node = max_core_id + 1; ··· 4855 topo.threads_per_core = max_siblings; 4856 if (debug > 1) 4857 fprintf(outf, "max_siblings %d\n", max_siblings); 4858 } 4859 4860 void ··· 5096 } 5097 5098 void print_version() { 5099 - fprintf(outf, "turbostat version 18.06.20" 5100 " - Len Brown <lenb@kernel.org>\n"); 5101 } 5102
··· 1163 if (!printed || !summary_only) 1164 print_header("\t"); 1165 1166 + format_counters(&average.threads, &average.cores, &average.packages); 1167 1168 printed = 1; 1169 ··· 1692 t->x2apic_id = edx; 1693 1694 if (debug && (t->apic_id != t->x2apic_id)) 1695 + fprintf(outf, "cpu%d: apic 0x%x x2apic 0x%x\n", t->cpu_id, t->apic_id, t->x2apic_id); 1696 } 1697 1698 /* ··· 2473 2474 void set_node_data(void) 2475 { 2476 + int pkg, node, lnode, cpu, cpux; 2477 + int cpu_count; 2478 2479 + /* initialize logical_node_id */ 2480 + for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) 2481 + cpus[cpu].logical_node_id = -1; 2482 2483 + cpu_count = 0; 2484 + for (pkg = 0; pkg < topo.num_packages; pkg++) { 2485 + lnode = 0; 2486 + for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) { 2487 + if (cpus[cpu].physical_package_id != pkg) 2488 + continue; 2489 + /* find a cpu with an unset logical_node_id */ 2490 + if (cpus[cpu].logical_node_id != -1) 2491 + continue; 2492 + cpus[cpu].logical_node_id = lnode; 2493 + node = cpus[cpu].physical_node_id; 2494 + cpu_count++; 2495 + /* 2496 + * find all matching cpus on this pkg and set 2497 + * the logical_node_id 2498 + */ 2499 + for (cpux = cpu; cpux <= topo.max_cpu_num; cpux++) { 2500 + if ((cpus[cpux].physical_package_id == pkg) && 2501 + (cpus[cpux].physical_node_id == node)) { 2502 + cpus[cpux].logical_node_id = lnode; 2503 + cpu_count++; 2504 + } 2505 + } 2506 + lnode++; 2507 + if (lnode > topo.nodes_per_pkg) 2508 + topo.nodes_per_pkg = lnode; 2509 + } 2510 + if (cpu_count >= topo.max_cpu_num) 2511 + break; 2512 } 2513 } 2514 2515 int get_physical_node_id(struct cpu_topology *thiscpu) ··· 4471 family = (fms >> 8) & 0xf; 4472 model = (fms >> 4) & 0xf; 4473 stepping = fms & 0xf; 4474 + if (family == 0xf) 4475 + family += (fms >> 20) & 0xff; 4476 + if (family >= 6) 4477 model += ((fms >> 16) & 0xf) << 4; 4478 4479 if (!quiet) { ··· 4840 siblings = get_thread_siblings(&cpus[i]); 4841 if (siblings > max_siblings) 4842 max_siblings = siblings; 4843 + if (cpus[i].thread_id == 0) 4844 topo.num_cores++; 4845 } 4846 4847 topo.cores_per_node = max_core_id + 1; ··· 4875 topo.threads_per_core = max_siblings; 4876 if (debug > 1) 4877 fprintf(outf, "max_siblings %d\n", max_siblings); 4878 + 4879 + if (debug < 1) 4880 + return; 4881 + 4882 + for (i = 0; i <= topo.max_cpu_num; ++i) { 4883 + fprintf(outf, 4884 + "cpu %d pkg %d node %d lnode %d core %d thread %d\n", 4885 + i, cpus[i].physical_package_id, 4886 + cpus[i].physical_node_id, 4887 + cpus[i].logical_node_id, 4888 + cpus[i].physical_core_id, 4889 + cpus[i].thread_id); 4890 + } 4891 + 4892 } 4893 4894 void ··· 5102 } 5103 5104 void print_version() { 5105 + fprintf(outf, "turbostat version 18.07.27" 5106 " - Len Brown <lenb@kernel.org>\n"); 5107 } 5108