powerpc/perf_event: Skip updating kernel counters if register value shrinks

Because of speculative event roll back, it is possible for some event coutners
to decrease between reads on POWER7. This causes a problem with the way that
counters are updated. Delta calues are calculated in a 64 bit value and the
top 32 bits are masked. If the register value has decreased, this leaves us
with a very large positive value added to the kernel counters. This patch
protects against this by skipping the update if the delta would be negative.
This can lead to a lack of precision in the coutner values, but from my testing
the value is typcially fewer than 10 samples at a time.

Signed-off-by: Eric B Munson <emunson@mgebm.net>
Cc: stable@kernel.org
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

authored by Eric B Munson and committed by Benjamin Herrenschmidt 86c74ab3 09597cfe

+30 -7
+30 -7
arch/powerpc/kernel/perf_event.c
··· 398 return 0; 399 } 400 401 static void power_pmu_read(struct perf_event *event) 402 { 403 s64 val, delta, prev; ··· 435 prev = local64_read(&event->hw.prev_count); 436 barrier(); 437 val = read_pmc(event->hw.idx); 438 } while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev); 439 440 - /* The counters are only 32 bits wide */ 441 - delta = (val - prev) & 0xfffffffful; 442 local64_add(delta, &event->count); 443 local64_sub(delta, &event->hw.period_left); 444 } ··· 469 val = (event->hw.idx == 5) ? pmc5 : pmc6; 470 prev = local64_read(&event->hw.prev_count); 471 event->hw.idx = 0; 472 - delta = (val - prev) & 0xfffffffful; 473 - local64_add(delta, &event->count); 474 } 475 } 476 ··· 479 unsigned long pmc5, unsigned long pmc6) 480 { 481 struct perf_event *event; 482 - u64 val; 483 int i; 484 485 for (i = 0; i < cpuhw->n_limited; ++i) { 486 event = cpuhw->limited_counter[i]; 487 event->hw.idx = cpuhw->limited_hwidx[i]; 488 val = (event->hw.idx == 5) ? pmc5 : pmc6; 489 - local64_set(&event->hw.prev_count, val); 490 perf_event_update_userpage(event); 491 } 492 } ··· 1220 1221 /* we don't have to worry about interrupts here */ 1222 prev = local64_read(&event->hw.prev_count); 1223 - delta = (val - prev) & 0xfffffffful; 1224 local64_add(delta, &event->count); 1225 1226 /*
··· 398 return 0; 399 } 400 401 + static u64 check_and_compute_delta(u64 prev, u64 val) 402 + { 403 + u64 delta = (val - prev) & 0xfffffffful; 404 + 405 + /* 406 + * POWER7 can roll back counter values, if the new value is smaller 407 + * than the previous value it will cause the delta and the counter to 408 + * have bogus values unless we rolled a counter over. If a coutner is 409 + * rolled back, it will be smaller, but within 256, which is the maximum 410 + * number of events to rollback at once. If we dectect a rollback 411 + * return 0. This can lead to a small lack of precision in the 412 + * counters. 413 + */ 414 + if (prev > val && (prev - val) < 256) 415 + delta = 0; 416 + 417 + return delta; 418 + } 419 + 420 static void power_pmu_read(struct perf_event *event) 421 { 422 s64 val, delta, prev; ··· 416 prev = local64_read(&event->hw.prev_count); 417 barrier(); 418 val = read_pmc(event->hw.idx); 419 + delta = check_and_compute_delta(prev, val); 420 + if (!delta) 421 + return; 422 } while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev); 423 424 local64_add(delta, &event->count); 425 local64_sub(delta, &event->hw.period_left); 426 } ··· 449 val = (event->hw.idx == 5) ? pmc5 : pmc6; 450 prev = local64_read(&event->hw.prev_count); 451 event->hw.idx = 0; 452 + delta = check_and_compute_delta(prev, val); 453 + if (delta) 454 + local64_add(delta, &event->count); 455 } 456 } 457 ··· 458 unsigned long pmc5, unsigned long pmc6) 459 { 460 struct perf_event *event; 461 + u64 val, prev; 462 int i; 463 464 for (i = 0; i < cpuhw->n_limited; ++i) { 465 event = cpuhw->limited_counter[i]; 466 event->hw.idx = cpuhw->limited_hwidx[i]; 467 val = (event->hw.idx == 5) ? pmc5 : pmc6; 468 + prev = local64_read(&event->hw.prev_count); 469 + if (check_and_compute_delta(prev, val)) 470 + local64_set(&event->hw.prev_count, val); 471 perf_event_update_userpage(event); 472 } 473 } ··· 1197 1198 /* we don't have to worry about interrupts here */ 1199 prev = local64_read(&event->hw.prev_count); 1200 + delta = check_and_compute_delta(prev, val); 1201 local64_add(delta, &event->count); 1202 1203 /*