Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf/x86/intel/rapl: Convert it to a per package facility

RAPL is a per package facility and we already have a mechanism for a dedicated
per package reader. So there is no point to have multiple CPUs doing the
same. The current implementation actually starts two timers on two CPUs if one
does:

perf stat -C1,2 -e -e power/energy-pkg ....

which makes the whole concept of 1 reader per package moot.

What's worse is that the above returns the double of the actual energy
consumption, but that's a different problem to address and cannot be solved by
removing the pointless per cpuness of that mechanism.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andi Kleen <andi.kleen@intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Harish Chegondi <harish.chegondi@intel.com>
Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: linux-kernel@vger.kernel.org
Link: http://lkml.kernel.org/r/20160222221012.845369524@linutronix.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>

authored by

Thomas Gleixner and committed by
Ingo Molnar
9de8d686 8a6d2f8f

+86 -108
+86 -108
arch/x86/events/intel/rapl.c
··· 129 129 struct hrtimer hrtimer; 130 130 }; 131 131 132 + struct rapl_pmus { 133 + struct pmu pmu; 134 + unsigned int maxpkg; 135 + struct rapl_pmu *pmus[]; 136 + }; 137 + 132 138 /* 1/2^hw_unit Joule */ 133 139 static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly; 134 - static struct pmu rapl_pmu_class; 140 + static struct rapl_pmus *rapl_pmus; 135 141 static cpumask_t rapl_cpu_mask; 136 - static int rapl_cntr_mask; 142 + static unsigned int rapl_cntr_mask; 137 143 static u64 rapl_timer_ms; 138 144 139 - static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu); 140 - static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu_to_free); 145 + static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu) 146 + { 147 + return rapl_pmus->pmus[topology_logical_package_id(cpu)]; 148 + } 141 149 142 150 static inline u64 rapl_read_counter(struct perf_event *event) 143 151 { ··· 325 317 326 318 static int rapl_pmu_event_init(struct perf_event *event) 327 319 { 328 - struct rapl_pmu *pmu = __this_cpu_read(rapl_pmu); 329 320 u64 cfg = event->attr.config & RAPL_EVENT_MASK; 330 321 int bit, msr, ret = 0; 322 + struct rapl_pmu *pmu; 331 323 332 324 /* only look at RAPL events */ 333 - if (event->attr.type != rapl_pmu_class.type) 325 + if (event->attr.type != rapl_pmus->pmu.type) 334 326 return -ENOENT; 335 327 336 328 /* check only supported bits are set */ ··· 378 370 return -EINVAL; 379 371 380 372 /* must be done before validate_group */ 373 + pmu = cpu_to_rapl_pmu(event->cpu); 381 374 event->cpu = pmu->cpu; 382 375 event->pmu_private = pmu; 383 376 event->hw.event_base = msr; ··· 511 502 NULL, 512 503 }; 513 504 514 - static struct pmu rapl_pmu_class = { 515 - .attr_groups = rapl_attr_groups, 516 - .task_ctx_nr = perf_invalid_context, /* system-wide only */ 517 - .event_init = rapl_pmu_event_init, 518 - .add = rapl_pmu_event_add, /* must have */ 519 - .del = rapl_pmu_event_del, /* must have */ 520 - .start = rapl_pmu_event_start, 521 - .stop = rapl_pmu_event_stop, 522 - .read = rapl_pmu_event_read, 523 - }; 524 - 525 505 static void rapl_cpu_exit(int cpu) 526 506 { 527 - struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu); 528 - int i, phys_id = topology_physical_package_id(cpu); 529 - int target = -1; 507 + struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu); 508 + int target; 530 509 531 - /* find a new cpu on same package */ 532 - for_each_online_cpu(i) { 533 - if (i == cpu) 534 - continue; 535 - if (phys_id == topology_physical_package_id(i)) { 536 - target = i; 537 - break; 538 - } 539 - } 540 - /* 541 - * clear cpu from cpumask 542 - * if was set in cpumask and still some cpu on package, 543 - * then move to new cpu 544 - */ 545 - if (cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask) && target >= 0) 510 + /* Check if exiting cpu is used for collecting rapl events */ 511 + if (!cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask)) 512 + return; 513 + 514 + pmu->cpu = -1; 515 + /* Find a new cpu to collect rapl events */ 516 + target = cpumask_any_but(topology_core_cpumask(cpu), cpu); 517 + 518 + /* Migrate rapl events to the new target */ 519 + if (target < nr_cpu_ids) { 546 520 cpumask_set_cpu(target, &rapl_cpu_mask); 547 - 548 - WARN_ON(cpumask_empty(&rapl_cpu_mask)); 549 - /* 550 - * migrate events and context to new cpu 551 - */ 552 - if (target >= 0) 521 + pmu->cpu = target; 553 522 perf_pmu_migrate_context(pmu->pmu, cpu, target); 554 - 555 - /* cancel overflow polling timer for CPU */ 556 - hrtimer_cancel(&pmu->hrtimer); 523 + } 557 524 } 558 525 559 526 static void rapl_cpu_init(int cpu) 560 527 { 561 - int i, phys_id = topology_physical_package_id(cpu); 528 + struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu); 529 + int target; 562 530 563 - /* check if phys_is is already covered */ 564 - for_each_cpu(i, &rapl_cpu_mask) { 565 - if (phys_id == topology_physical_package_id(i)) 566 - return; 567 - } 568 - /* was not found, so add it */ 531 + /* 532 + * Check if there is an online cpu in the package which collects rapl 533 + * events already. 534 + */ 535 + target = cpumask_any_and(&rapl_cpu_mask, topology_core_cpumask(cpu)); 536 + if (target < nr_cpu_ids) 537 + return; 538 + 569 539 cpumask_set_cpu(cpu, &rapl_cpu_mask); 540 + pmu->cpu = cpu; 570 541 } 571 542 572 543 static int rapl_cpu_prepare(int cpu) 573 544 { 574 - struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu); 575 - int phys_id = topology_physical_package_id(cpu); 545 + struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu); 576 546 577 547 if (pmu) 578 548 return 0; 579 549 580 - if (phys_id < 0) 581 - return -1; 582 - 583 550 pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu)); 584 551 if (!pmu) 585 - return -1; 552 + return -ENOMEM; 553 + 586 554 raw_spin_lock_init(&pmu->lock); 587 - 588 555 INIT_LIST_HEAD(&pmu->active_list); 589 - 590 - pmu->pmu = &rapl_pmu_class; 591 - pmu->cpu = cpu; 592 - 556 + pmu->pmu = &rapl_pmus->pmu; 593 557 pmu->timer_interval = ms_to_ktime(rapl_timer_ms); 594 - 558 + pmu->cpu = -1; 595 559 rapl_hrtimer_init(pmu); 596 - 597 - /* set RAPL pmu for this cpu for now */ 598 - per_cpu(rapl_pmu, cpu) = pmu; 599 - per_cpu(rapl_pmu_to_free, cpu) = NULL; 600 - 601 - return 0; 602 - } 603 - 604 - static void rapl_cpu_kfree(int cpu) 605 - { 606 - struct rapl_pmu *pmu = per_cpu(rapl_pmu_to_free, cpu); 607 - 608 - kfree(pmu); 609 - 610 - per_cpu(rapl_pmu_to_free, cpu) = NULL; 611 - } 612 - 613 - static int rapl_cpu_dying(int cpu) 614 - { 615 - struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu); 616 - 617 - if (!pmu) 618 - return 0; 619 - 620 - per_cpu(rapl_pmu, cpu) = NULL; 621 - 622 - per_cpu(rapl_pmu_to_free, cpu) = pmu; 623 - 560 + rapl_pmus->pmus[topology_logical_package_id(cpu)] = pmu; 624 561 return 0; 625 562 } 626 563 ··· 579 624 case CPU_UP_PREPARE: 580 625 rapl_cpu_prepare(cpu); 581 626 break; 582 - case CPU_STARTING: 627 + 628 + case CPU_DOWN_FAILED: 629 + case CPU_ONLINE: 583 630 rapl_cpu_init(cpu); 584 631 break; 585 - case CPU_UP_CANCELED: 586 - case CPU_DYING: 587 - rapl_cpu_dying(cpu); 588 - break; 589 - case CPU_ONLINE: 590 - case CPU_DEAD: 591 - rapl_cpu_kfree(cpu); 592 - break; 632 + 593 633 case CPU_DOWN_PREPARE: 594 634 rapl_cpu_exit(cpu); 595 635 break; 596 - default: 597 - break; 598 636 } 599 - 600 637 return NOTIFY_OK; 601 638 } 602 639 ··· 650 703 651 704 static int __init rapl_prepare_cpus(void) 652 705 { 653 - unsigned int cpu; 706 + unsigned int cpu, pkg; 654 707 int ret; 655 708 656 709 for_each_online_cpu(cpu) { 710 + pkg = topology_logical_package_id(cpu); 711 + if (rapl_pmus->pmus[pkg]) 712 + continue; 713 + 657 714 ret = rapl_cpu_prepare(cpu); 658 715 if (ret) 659 716 return ret; ··· 668 717 669 718 static void __init cleanup_rapl_pmus(void) 670 719 { 671 - int cpu; 720 + int i; 672 721 673 - for_each_online_cpu(cpu) 674 - kfree(per_cpu(rapl_pmu, cpu)); 722 + for (i = 0; i < rapl_pmus->maxpkg; i++) 723 + kfree(rapl_pmus->pmus + i); 724 + kfree(rapl_pmus); 725 + } 726 + 727 + static int __init init_rapl_pmus(void) 728 + { 729 + int maxpkg = topology_max_packages(); 730 + size_t size; 731 + 732 + size = sizeof(*rapl_pmus) + maxpkg * sizeof(struct rapl_pmu *); 733 + rapl_pmus = kzalloc(size, GFP_KERNEL); 734 + if (!rapl_pmus) 735 + return -ENOMEM; 736 + 737 + rapl_pmus->maxpkg = maxpkg; 738 + rapl_pmus->pmu.attr_groups = rapl_attr_groups; 739 + rapl_pmus->pmu.task_ctx_nr = perf_invalid_context; 740 + rapl_pmus->pmu.event_init = rapl_pmu_event_init; 741 + rapl_pmus->pmu.add = rapl_pmu_event_add; 742 + rapl_pmus->pmu.del = rapl_pmu_event_del; 743 + rapl_pmus->pmu.start = rapl_pmu_event_start; 744 + rapl_pmus->pmu.stop = rapl_pmu_event_stop; 745 + rapl_pmus->pmu.read = rapl_pmu_event_read; 746 + return 0; 675 747 } 676 748 677 749 static const struct x86_cpu_id rapl_cpu_match[] __initconst = { ··· 745 771 if (ret) 746 772 return ret; 747 773 774 + ret = init_rapl_pmus(); 775 + if (ret) 776 + return ret; 777 + 748 778 cpu_notifier_register_begin(); 749 779 750 780 ret = rapl_prepare_cpus(); 751 781 if (ret) 752 782 goto out; 753 783 754 - ret = perf_pmu_register(&rapl_pmu_class, "power", -1); 784 + ret = perf_pmu_register(&rapl_pmus->pmu, "power", -1); 755 785 if (ret) 756 786 goto out; 757 787