x86/events/amd/iommu: Fix invalid Perf result due to IOMMU PMC power-gating

On certain AMD platforms, when the IOMMU performance counter source
(csource) field is zero, power-gating for the counter is enabled, which
prevents write access and returns zero for read access.

This can cause invalid perf result especially when event multiplexing
is needed (i.e. more number of events than available counters) since
the current logic keeps track of the previously read counter value,
and subsequently re-program the counter to continue counting the event.
With power-gating enabled, we cannot gurantee successful re-programming
of the counter.

Workaround this issue by :

1. Modifying the ordering of setting/reading counters and enabing/
disabling csources to only access the counter when the csource
is set to non-zero.

2. Since AMD IOMMU PMU does not support interrupt mode, the logic
can be simplified to always start counting with value zero,
and accumulate the counter value when stopping without the need
to keep track and reprogram the counter with the previously read
counter value.

This has been tested on systems with and without power-gating.

Fixes: 994d6608efe4 ("iommu/amd: Remove performance counter pre-initialization test")
Suggested-by: Alexander Monakov <amonakov@ispras.ru>
Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20210504065236.4415-1-suravee.suthikulpanit@amd.com

authored by Suravee Suthikulpanit and committed by Peter Zijlstra e10de314 635de956

+26 -21
+26 -21
arch/x86/events/amd/iommu.c
··· 18 #include "../perf_event.h" 19 #include "iommu.h" 20 21 - #define COUNTER_SHIFT 16 22 - 23 /* iommu pmu conf masks */ 24 #define GET_CSOURCE(x) ((x)->conf & 0xFFULL) 25 #define GET_DEVID(x) (((x)->conf >> 8) & 0xFFFFULL) ··· 283 WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); 284 hwc->state = 0; 285 286 if (flags & PERF_EF_RELOAD) { 287 - u64 prev_raw_count = local64_read(&hwc->prev_count); 288 struct amd_iommu *iommu = perf_event_2_iommu(event); 289 290 amd_iommu_pc_set_reg(iommu, hwc->iommu_bank, hwc->iommu_cntr, 291 - IOMMU_PC_COUNTER_REG, &prev_raw_count); 292 } 293 294 - perf_iommu_enable_event(event); 295 perf_event_update_userpage(event); 296 - 297 } 298 299 static void perf_iommu_read(struct perf_event *event) 300 { 301 - u64 count, prev, delta; 302 struct hw_perf_event *hwc = &event->hw; 303 struct amd_iommu *iommu = perf_event_2_iommu(event); 304 ··· 318 /* IOMMU pc counter register is only 48 bits */ 319 count &= GENMASK_ULL(47, 0); 320 321 - prev = local64_read(&hwc->prev_count); 322 - if (local64_cmpxchg(&hwc->prev_count, prev, count) != prev) 323 - return; 324 - 325 - /* Handle 48-bit counter overflow */ 326 - delta = (count << COUNTER_SHIFT) - (prev << COUNTER_SHIFT); 327 - delta >>= COUNTER_SHIFT; 328 - local64_add(delta, &event->count); 329 } 330 331 static void perf_iommu_stop(struct perf_event *event, int flags) ··· 332 if (hwc->state & PERF_HES_UPTODATE) 333 return; 334 335 perf_iommu_disable_event(event); 336 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); 337 hwc->state |= PERF_HES_STOPPED; 338 - 339 - if (hwc->state & PERF_HES_UPTODATE) 340 - return; 341 - 342 - perf_iommu_read(event); 343 - hwc->state |= PERF_HES_UPTODATE; 344 } 345 346 static int perf_iommu_add(struct perf_event *event, int flags)
··· 18 #include "../perf_event.h" 19 #include "iommu.h" 20 21 /* iommu pmu conf masks */ 22 #define GET_CSOURCE(x) ((x)->conf & 0xFFULL) 23 #define GET_DEVID(x) (((x)->conf >> 8) & 0xFFFFULL) ··· 285 WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); 286 hwc->state = 0; 287 288 + /* 289 + * To account for power-gating, which prevents write to 290 + * the counter, we need to enable the counter 291 + * before setting up counter register. 292 + */ 293 + perf_iommu_enable_event(event); 294 + 295 if (flags & PERF_EF_RELOAD) { 296 + u64 count = 0; 297 struct amd_iommu *iommu = perf_event_2_iommu(event); 298 299 + /* 300 + * Since the IOMMU PMU only support counting mode, 301 + * the counter always start with value zero. 302 + */ 303 amd_iommu_pc_set_reg(iommu, hwc->iommu_bank, hwc->iommu_cntr, 304 + IOMMU_PC_COUNTER_REG, &count); 305 } 306 307 perf_event_update_userpage(event); 308 } 309 310 static void perf_iommu_read(struct perf_event *event) 311 { 312 + u64 count; 313 struct hw_perf_event *hwc = &event->hw; 314 struct amd_iommu *iommu = perf_event_2_iommu(event); 315 ··· 311 /* IOMMU pc counter register is only 48 bits */ 312 count &= GENMASK_ULL(47, 0); 313 314 + /* 315 + * Since the counter always start with value zero, 316 + * simply just accumulate the count for the event. 317 + */ 318 + local64_add(count, &event->count); 319 } 320 321 static void perf_iommu_stop(struct perf_event *event, int flags) ··· 328 if (hwc->state & PERF_HES_UPTODATE) 329 return; 330 331 + /* 332 + * To account for power-gating, in which reading the counter would 333 + * return zero, we need to read the register before disabling. 334 + */ 335 + perf_iommu_read(event); 336 + hwc->state |= PERF_HES_UPTODATE; 337 + 338 perf_iommu_disable_event(event); 339 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); 340 hwc->state |= PERF_HES_STOPPED; 341 } 342 343 static int perf_iommu_add(struct perf_event *event, int flags)