powerpc/perf_event: Skip updating kernel counters if register value shrinks

Because of speculative event roll back, it is possible for some event coutners
to decrease between reads on POWER7. This causes a problem with the way that
counters are updated. Delta calues are calculated in a 64 bit value and the
top 32 bits are masked. If the register value has decreased, this leaves us
with a very large positive value added to the kernel counters. This patch
protects against this by skipping the update if the delta would be negative.
This can lead to a lack of precision in the coutner values, but from my testing
the value is typcially fewer than 10 samples at a time.

Signed-off-by: Eric B Munson <emunson@mgebm.net>
Cc: stable@kernel.org
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

authored by Eric B Munson and committed by Benjamin Herrenschmidt 86c74ab3 09597cfe

+30 -7
+30 -7
arch/powerpc/kernel/perf_event.c
··· 398 398 return 0; 399 399 } 400 400 401 + static u64 check_and_compute_delta(u64 prev, u64 val) 402 + { 403 + u64 delta = (val - prev) & 0xfffffffful; 404 + 405 + /* 406 + * POWER7 can roll back counter values, if the new value is smaller 407 + * than the previous value it will cause the delta and the counter to 408 + * have bogus values unless we rolled a counter over. If a coutner is 409 + * rolled back, it will be smaller, but within 256, which is the maximum 410 + * number of events to rollback at once. If we dectect a rollback 411 + * return 0. This can lead to a small lack of precision in the 412 + * counters. 413 + */ 414 + if (prev > val && (prev - val) < 256) 415 + delta = 0; 416 + 417 + return delta; 418 + } 419 + 401 420 static void power_pmu_read(struct perf_event *event) 402 421 { 403 422 s64 val, delta, prev; ··· 435 416 prev = local64_read(&event->hw.prev_count); 436 417 barrier(); 437 418 val = read_pmc(event->hw.idx); 419 + delta = check_and_compute_delta(prev, val); 420 + if (!delta) 421 + return; 438 422 } while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev); 439 423 440 - /* The counters are only 32 bits wide */ 441 - delta = (val - prev) & 0xfffffffful; 442 424 local64_add(delta, &event->count); 443 425 local64_sub(delta, &event->hw.period_left); 444 426 } ··· 469 449 val = (event->hw.idx == 5) ? pmc5 : pmc6; 470 450 prev = local64_read(&event->hw.prev_count); 471 451 event->hw.idx = 0; 472 - delta = (val - prev) & 0xfffffffful; 473 - local64_add(delta, &event->count); 452 + delta = check_and_compute_delta(prev, val); 453 + if (delta) 454 + local64_add(delta, &event->count); 474 455 } 475 456 } 476 457 ··· 479 458 unsigned long pmc5, unsigned long pmc6) 480 459 { 481 460 struct perf_event *event; 482 - u64 val; 461 + u64 val, prev; 483 462 int i; 484 463 485 464 for (i = 0; i < cpuhw->n_limited; ++i) { 486 465 event = cpuhw->limited_counter[i]; 487 466 event->hw.idx = cpuhw->limited_hwidx[i]; 488 467 val = (event->hw.idx == 5) ? pmc5 : pmc6; 489 - local64_set(&event->hw.prev_count, val); 468 + prev = local64_read(&event->hw.prev_count); 469 + if (check_and_compute_delta(prev, val)) 470 + local64_set(&event->hw.prev_count, val); 490 471 perf_event_update_userpage(event); 491 472 } 492 473 } ··· 1220 1197 1221 1198 /* we don't have to worry about interrupts here */ 1222 1199 prev = local64_read(&event->hw.prev_count); 1223 - delta = (val - prev) & 0xfffffffful; 1200 + delta = check_and_compute_delta(prev, val); 1224 1201 local64_add(delta, &event->count); 1225 1202 1226 1203 /*