Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf, x86: Implement event scheduler helper functions

This patch introduces x86 perf scheduler code helper functions. We
need this to later add more complex functionality to support
overlapping counter constraints (next patch).

The algorithm is modified so that the range of weight values is now
generated from the constraints. There shouldn't be other functional
changes.

With the helper functions the scheduler is controlled. There are
functions to initialize, traverse the event list, find unused counters
etc. The scheduler keeps its own state.

V3:
* Added macro for_each_set_bit_cont().
* Changed functions interfaces of perf_sched_find_counter() and
perf_sched_next_event() to use bool as return value.
* Added some comments to make code better understandable.

V4:
* Fix broken event assignment if weight of the first event is not
wmin (perf_sched_init()).

Signed-off-by: Robert Richter <robert.richter@amd.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1321616122-1533-2-git-send-email-robert.richter@amd.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>

authored by

Robert Richter and committed by
Ingo Molnar
1e2ad28f 0f5a2601

+140 -55
+132 -53
arch/x86/kernel/cpu/perf_event.c
··· 484 484 return event->pmu == &pmu; 485 485 } 486 486 487 + /* 488 + * Event scheduler state: 489 + * 490 + * Assign events iterating over all events and counters, beginning 491 + * with events with least weights first. Keep the current iterator 492 + * state in struct sched_state. 493 + */ 494 + struct sched_state { 495 + int weight; 496 + int event; /* event index */ 497 + int counter; /* counter index */ 498 + int unassigned; /* number of events to be assigned left */ 499 + unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; 500 + }; 501 + 502 + struct perf_sched { 503 + int max_weight; 504 + int max_events; 505 + struct event_constraint **constraints; 506 + struct sched_state state; 507 + }; 508 + 509 + /* 510 + * Initialize interator that runs through all events and counters. 511 + */ 512 + static void perf_sched_init(struct perf_sched *sched, struct event_constraint **c, 513 + int num, int wmin, int wmax) 514 + { 515 + int idx; 516 + 517 + memset(sched, 0, sizeof(*sched)); 518 + sched->max_events = num; 519 + sched->max_weight = wmax; 520 + sched->constraints = c; 521 + 522 + for (idx = 0; idx < num; idx++) { 523 + if (c[idx]->weight == wmin) 524 + break; 525 + } 526 + 527 + sched->state.event = idx; /* start with min weight */ 528 + sched->state.weight = wmin; 529 + sched->state.unassigned = num; 530 + } 531 + 532 + /* 533 + * Select a counter for the current event to schedule. Return true on 534 + * success. 535 + */ 536 + static bool perf_sched_find_counter(struct perf_sched *sched) 537 + { 538 + struct event_constraint *c; 539 + int idx; 540 + 541 + if (!sched->state.unassigned) 542 + return false; 543 + 544 + if (sched->state.event >= sched->max_events) 545 + return false; 546 + 547 + c = sched->constraints[sched->state.event]; 548 + 549 + /* Grab the first unused counter starting with idx */ 550 + idx = sched->state.counter; 551 + for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_MAX) { 552 + if (!__test_and_set_bit(idx, sched->state.used)) 553 + break; 554 + } 555 + sched->state.counter = idx; 556 + 557 + if (idx >= X86_PMC_IDX_MAX) 558 + return false; 559 + 560 + return true; 561 + } 562 + 563 + /* 564 + * Go through all unassigned events and find the next one to schedule. 565 + * Take events with the least weight first. Return true on success. 566 + */ 567 + static bool perf_sched_next_event(struct perf_sched *sched) 568 + { 569 + struct event_constraint *c; 570 + 571 + if (!sched->state.unassigned || !--sched->state.unassigned) 572 + return false; 573 + 574 + do { 575 + /* next event */ 576 + sched->state.event++; 577 + if (sched->state.event >= sched->max_events) { 578 + /* next weight */ 579 + sched->state.event = 0; 580 + sched->state.weight++; 581 + if (sched->state.weight > sched->max_weight) 582 + return false; 583 + } 584 + c = sched->constraints[sched->state.event]; 585 + } while (c->weight != sched->state.weight); 586 + 587 + sched->state.counter = 0; /* start with first counter */ 588 + 589 + return true; 590 + } 591 + 592 + /* 593 + * Assign a counter for each event. 594 + */ 595 + static int perf_assign_events(struct event_constraint **constraints, int n, 596 + int wmin, int wmax, int *assign) 597 + { 598 + struct perf_sched sched; 599 + 600 + perf_sched_init(&sched, constraints, n, wmin, wmax); 601 + 602 + do { 603 + if (!perf_sched_find_counter(&sched)) 604 + break; /* failed */ 605 + if (assign) 606 + assign[sched.state.event] = sched.state.counter; 607 + } while (perf_sched_next_event(&sched)); 608 + 609 + return sched.state.unassigned; 610 + } 611 + 487 612 int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) 488 613 { 489 614 struct event_constraint *c, *constraints[X86_PMC_IDX_MAX]; 490 615 unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; 491 - int i, j, w, wmax, num = 0; 616 + int i, wmin, wmax, num = 0; 492 617 struct hw_perf_event *hwc; 493 618 494 619 bitmap_zero(used_mask, X86_PMC_IDX_MAX); 495 620 496 - for (i = 0; i < n; i++) { 621 + for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) { 497 622 c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]); 498 623 constraints[i] = c; 624 + wmin = min(wmin, c->weight); 625 + wmax = max(wmax, c->weight); 499 626 } 500 627 501 628 /* ··· 648 521 if (assign) 649 522 assign[i] = hwc->idx; 650 523 } 651 - if (i == n) 652 - goto done; 653 524 654 - /* 655 - * begin slow path 656 - */ 525 + /* slow path */ 526 + if (i != n) 527 + num = perf_assign_events(constraints, n, wmin, wmax, assign); 657 528 658 - bitmap_zero(used_mask, X86_PMC_IDX_MAX); 659 - 660 - /* 661 - * weight = number of possible counters 662 - * 663 - * 1 = most constrained, only works on one counter 664 - * wmax = least constrained, works on any counter 665 - * 666 - * assign events to counters starting with most 667 - * constrained events. 668 - */ 669 - wmax = x86_pmu.num_counters; 670 - 671 - /* 672 - * when fixed event counters are present, 673 - * wmax is incremented by 1 to account 674 - * for one more choice 675 - */ 676 - if (x86_pmu.num_counters_fixed) 677 - wmax++; 678 - 679 - for (w = 1, num = n; num && w <= wmax; w++) { 680 - /* for each event */ 681 - for (i = 0; num && i < n; i++) { 682 - c = constraints[i]; 683 - hwc = &cpuc->event_list[i]->hw; 684 - 685 - if (c->weight != w) 686 - continue; 687 - 688 - for_each_set_bit(j, c->idxmsk, X86_PMC_IDX_MAX) { 689 - if (!test_bit(j, used_mask)) 690 - break; 691 - } 692 - 693 - if (j == X86_PMC_IDX_MAX) 694 - break; 695 - 696 - __set_bit(j, used_mask); 697 - 698 - if (assign) 699 - assign[i] = j; 700 - num--; 701 - } 702 - } 703 - done: 704 529 /* 705 530 * scheduling failed or is just a simulation, 706 531 * free resources if necessary
+8 -2
include/linux/bitops.h
··· 22 22 #include <asm/bitops.h> 23 23 24 24 #define for_each_set_bit(bit, addr, size) \ 25 - for ((bit) = find_first_bit((addr), (size)); \ 26 - (bit) < (size); \ 25 + for ((bit) = find_first_bit((addr), (size)); \ 26 + (bit) < (size); \ 27 + (bit) = find_next_bit((addr), (size), (bit) + 1)) 28 + 29 + /* same as for_each_set_bit() but use bit as value to start with */ 30 + #define for_each_set_bit_cont(bit, addr, size) \ 31 + for ((bit) = find_next_bit((addr), (size), (bit)); \ 32 + (bit) < (size); \ 27 33 (bit) = find_next_bit((addr), (size), (bit) + 1)) 28 34 29 35 static __inline__ int get_bitmask_order(unsigned int count)