tools/power turbostat: Allow Zero return value for some RAPL registers

turbostat aborted with below messages on a dual-package system,

turbostat: turbostat.c:3744: rapl_counter_accumulate: Assertion `dst->unit == src->unit' failed.
Aborted

This is because
1. the MSR_DRAM_PERF_STATUS returns Zero for one package, and non-Zero
for another package
2. probe_msr() treats Zero return value as a failure so this feature is
enabled on one package, and disabled for another package.
3. turbostat aborts because the feature is invalid on some package

Unlike the RAPL energy counter registers, MSR_DRAM_PERF_STATUS can
return Zero value, and this should not be treated as a failure.

Fix the problem by allowing Zero return value for RAPL registers other
than the energy counters.

Fixes: 7c6fee25bdf5 ("tools/power turbostat: Check for non-zero value when MSR probing")
Reported-by: Artem Bityutskiy <artem.bityutskiy@intel.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>

authored by Zhang Rui and committed by Len Brown b312d880 1c7c7388

+18 -9
+18 -9
tools/power/x86/turbostat/turbostat.c
··· 2211 2211 return 0; 2212 2212 } 2213 2213 2214 - int probe_msr(int cpu, off_t offset) 2214 + int probe_rapl_msr(int cpu, off_t offset, int index) 2215 2215 { 2216 2216 ssize_t retval; 2217 2217 unsigned long long value; ··· 2220 2220 2221 2221 retval = pread(get_msr_fd(cpu), &value, sizeof(value), offset); 2222 2222 2223 - /* 2224 - * Expect MSRs to accumulate some non-zero value since the system was powered on. 2225 - * Treat zero as a read failure. 2226 - */ 2227 - if (retval != sizeof(value) || value == 0) 2223 + /* if the read failed, the probe fails */ 2224 + if (retval != sizeof(value)) 2228 2225 return 1; 2229 2226 2227 + /* If an Energy Status Counter MSR returns 0, the probe fails */ 2228 + switch (index) { 2229 + case RAPL_RCI_INDEX_ENERGY_PKG: 2230 + case RAPL_RCI_INDEX_ENERGY_CORES: 2231 + case RAPL_RCI_INDEX_DRAM: 2232 + case RAPL_RCI_INDEX_GFX: 2233 + case RAPL_RCI_INDEX_ENERGY_PLATFORM: 2234 + if (value == 0) 2235 + return 1; 2236 + } 2237 + 2238 + /* PKG,DRAM_PERF_STATUS MSRs, can return any value */ 2230 2239 return 0; 2231 2240 } 2232 2241 ··· 7916 7907 rci->flags[cai->rci_index] = cai->flags; 7917 7908 7918 7909 /* Use MSR for this counter */ 7919 - } else if (!no_msr && cai->msr && probe_msr(cpu, cai->msr) == 0) { 7910 + } else if (!no_msr && cai->msr && probe_rapl_msr(cpu, cai->msr, cai->rci_index) == 0) { 7920 7911 rci->source[cai->rci_index] = COUNTER_SOURCE_MSR; 7921 7912 rci->msr[cai->rci_index] = cai->msr; 7922 7913 rci->msr_mask[cai->rci_index] = cai->msr_mask; ··· 8054 8045 cai->present = true; 8055 8046 8056 8047 /* User MSR for this counter */ 8057 - } else if (!no_msr && cai->msr && probe_msr(cpu, cai->msr) == 0) { 8048 + } else if (!no_msr && cai->msr && probe_rapl_msr(cpu, cai->msr, cai->rci_index) == 0) { 8058 8049 cci->source[cai->rci_index] = COUNTER_SOURCE_MSR; 8059 8050 cci->msr[cai->rci_index] = cai->msr; 8060 8051 cci->msr_mask[cai->rci_index] = cai->msr_mask; ··· 8168 8159 8169 8160 /* User MSR for this counter */ 8170 8161 } else if (!no_msr && cai->msr && pkg_cstate_limit >= cai->pkg_cstate_limit 8171 - && probe_msr(cpu, cai->msr) == 0) { 8162 + && probe_rapl_msr(cpu, cai->msr, cai->rci_index) == 0) { 8172 8163 cci->source[cai->rci_index] = COUNTER_SOURCE_MSR; 8173 8164 cci->msr[cai->rci_index] = cai->msr; 8174 8165 }