Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

cpupower: cpupower monitor reports uninitialized values for offline cpus

[root@hp-dl980g7-02 linux]# cpupower monitor
...
5472| 0| 1|******|******|******|******|| 0.00| 0.00| 0.00| 0.00| 0.00 *is offline
10567| 0| 159|******|******|******|******|| 0.00| 0.00| 0.00| 0.00| 0.00 *is offline
1661206560|859272560| 150|******|******|******|******|| 0.00| 0.00| 0.00| 0.00| 0.00 *is offline
1661206560|943093104| 140|******|******|******|******|| 0.00| 0.00| 0.00| 0.00| 0.00 *is offline

because of this cpupower also holds the incorrect value for the number
of physical packages in the machine

Changed cpupower to initialize the values of an offline cpu's socket and
core to -1, warn the user that one or more cpus is/are
offline and not print statistics for offline cpus.

This fix hides offlined cores where topology cannot be accessed.
With a recent kernel patch suggested from Prarit Bhargava it may be possible
that soft offlined cores' topology can still be parsed.
This patch would then show which cores in which package/socket are offline,
when sane toplogoy information is available.

Signed-off-by: Jacob Tanenbaum <jtanenba@redhat.com>
Signed-off-by: Thomas Renninger <trenn@suse.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

authored by

Jacob Tanenbaum and committed by
Rafael J. Wysocki
20102ac5 32b88194

+23 -9
+15 -8
tools/power/cpupower/utils/helpers/topology.c
··· 73 73 for (cpu = 0; cpu < cpus; cpu++) { 74 74 cpu_top->core_info[cpu].cpu = cpu; 75 75 cpu_top->core_info[cpu].is_online = sysfs_is_cpu_online(cpu); 76 - if (!cpu_top->core_info[cpu].is_online) 77 - continue; 78 76 if(sysfs_topology_read_file( 79 77 cpu, 80 78 "physical_package_id", 81 - &(cpu_top->core_info[cpu].pkg)) < 0) 82 - return -1; 79 + &(cpu_top->core_info[cpu].pkg)) < 0) { 80 + cpu_top->core_info[cpu].pkg = -1; 81 + cpu_top->core_info[cpu].core = -1; 82 + continue; 83 + } 83 84 if(sysfs_topology_read_file( 84 85 cpu, 85 86 "core_id", 86 - &(cpu_top->core_info[cpu].core)) < 0) 87 - return -1; 87 + &(cpu_top->core_info[cpu].core)) < 0) { 88 + cpu_top->core_info[cpu].pkg = -1; 89 + cpu_top->core_info[cpu].core = -1; 90 + continue; 91 + } 88 92 } 89 93 90 94 qsort(cpu_top->core_info, cpus, sizeof(struct cpuid_core_info), ··· 99 95 done by pkg value. */ 100 96 last_pkg = cpu_top->core_info[0].pkg; 101 97 for(cpu = 1; cpu < cpus; cpu++) { 102 - if(cpu_top->core_info[cpu].pkg != last_pkg) { 98 + if (cpu_top->core_info[cpu].pkg != last_pkg && 99 + cpu_top->core_info[cpu].pkg != -1) { 100 + 103 101 last_pkg = cpu_top->core_info[cpu].pkg; 104 102 cpu_top->pkgs++; 105 103 } 106 104 } 107 - cpu_top->pkgs++; 105 + if (!cpu_top->core_info[0].pkg == -1) 106 + cpu_top->pkgs++; 108 107 109 108 /* Intel's cores count is not consecutively numbered, there may 110 109 * be a core_id of 3, but none of 2. Assume there always is 0
+8 -1
tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c
··· 143 143 /* Be careful CPUs may got resorted for pkg value do not just use cpu */ 144 144 if (!bitmask_isbitset(cpus_chosen, cpu_top.core_info[cpu].cpu)) 145 145 return; 146 + if (!cpu_top.core_info[cpu].is_online && 147 + cpu_top.core_info[cpu].pkg == -1) 148 + return; 146 149 147 150 if (topology_depth > 2) 148 151 printf("%4d|", cpu_top.core_info[cpu].pkg); ··· 194 191 * It's up to the monitor plug-in to check .is_online, this one 195 192 * is just for additional info. 196 193 */ 197 - if (!cpu_top.core_info[cpu].is_online) { 194 + if (!cpu_top.core_info[cpu].is_online && 195 + cpu_top.core_info[cpu].pkg != -1) { 198 196 printf(_(" *is offline\n")); 199 197 return; 200 198 } else ··· 391 387 printf(_("Cannot read number of available processors\n")); 392 388 return EXIT_FAILURE; 393 389 } 390 + 391 + if (!cpu_top.core_info[0].is_online) 392 + printf("WARNING: at least one cpu is offline\n"); 394 393 395 394 /* Default is: monitor all CPUs */ 396 395 if (bitmask_isallclear(cpus_chosen))