Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ARCv2: perf: optimize given that num counters <= 32

use ffz primitive which maps to ARCv2 instruction, vs. non atomic
__test_and_set_bit

It is unlikely if we will even have more than 32 counters, but still add
a BUILD_BUG to catch that

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>

+6 -8
+6 -8
arch/arc/kernel/perf_event.c
··· 336 336 struct hw_perf_event *hwc = &event->hw; 337 337 int idx = hwc->idx; 338 338 339 - if (__test_and_set_bit(idx, pmu_cpu->used_mask)) { 340 - idx = find_first_zero_bit(pmu_cpu->used_mask, 341 - arc_pmu->n_counters); 342 - if (idx == arc_pmu->n_counters) 343 - return -EAGAIN; 339 + idx = ffz(pmu_cpu->used_mask[0]); 340 + if (idx == arc_pmu->n_counters) 341 + return -EAGAIN; 344 342 345 - __set_bit(idx, pmu_cpu->used_mask); 346 - hwc->idx = idx; 347 - } 343 + __set_bit(idx, pmu_cpu->used_mask); 344 + hwc->idx = idx; 348 345 349 346 write_aux_reg(ARC_REG_PCT_INDEX, idx); 350 347 ··· 462 465 pr_err("This core does not have performance counters!\n"); 463 466 return -ENODEV; 464 467 } 468 + BUILD_BUG_ON(ARC_PERF_MAX_COUNTERS > 32); 465 469 BUG_ON(pct_bcr.c > ARC_PERF_MAX_COUNTERS); 466 470 467 471 READ_BCR(ARC_REG_CC_BUILD, cc_bcr);