Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf/arm-ni: Consolidate CPU affinity handling

Since overflow interrupts from the individual PMUs are infrequent and
unlikely to coincide, and we make no attempt to balance them across
CPUs anyway, there's really not much point tracking a separate CPU
affinity per PMU. Move the CPU affinity and hotplug migration up to
the NI instance level.

Tested-by: Shouping Wang <allen.wang@hj-micro.com>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Link: https://lore.kernel.org/r/00b622872006c2f0c89485e343b1cb8caaa79c47.1752256072.git.robin.murphy@arm.com
Signed-off-by: Will Deacon <will@kernel.org>

authored by

Robin Murphy and committed by
Will Deacon
6a5dc6c7 0259de63

+34 -40
+34 -40
drivers/perf/arm-ni.c
··· 104 104 u16 id; 105 105 int num_units; 106 106 int irq; 107 - int cpu; 108 - struct hlist_node cpuhp_node; 109 107 struct pmu pmu; 110 108 struct arm_ni_unit *units; 111 109 struct perf_event *evcnt[NI_NUM_COUNTERS]; ··· 115 117 void __iomem *base; 116 118 enum ni_part part; 117 119 int id; 120 + int cpu; 118 121 int num_cds; 122 + struct hlist_node cpuhp_node; 119 123 struct arm_ni_cd cds[] __counted_by(num_cds); 120 124 }; 121 125 122 126 #define cd_to_ni(cd) container_of((cd), struct arm_ni, cds[(cd)->id]) 123 127 #define pmu_to_cd(p) container_of((p), struct arm_ni_cd, pmu) 128 + 129 + #define ni_for_each_cd(n, c) \ 130 + for (struct arm_ni_cd *c = n->cds; c < n->cds + n->num_cds; c++) if (c->pmu_base) 124 131 125 132 #define cd_for_each_unit(cd, u) \ 126 133 for (struct arm_ni_unit *u = cd->units; u < cd->units + cd->num_units; u++) ··· 221 218 static ssize_t arm_ni_cpumask_show(struct device *dev, 222 219 struct device_attribute *attr, char *buf) 223 220 { 224 - struct arm_ni_cd *cd = pmu_to_cd(dev_get_drvdata(dev)); 221 + struct arm_ni *ni = cd_to_ni(pmu_to_cd(dev_get_drvdata(dev))); 225 222 226 - return cpumap_print_to_pagebuf(true, buf, cpumask_of(cd->cpu)); 223 + return cpumap_print_to_pagebuf(true, buf, cpumask_of(ni->cpu)); 227 224 } 228 225 229 226 static struct device_attribute arm_ni_cpumask_attr = ··· 317 314 if (is_sampling_event(event)) 318 315 return -EINVAL; 319 316 320 - event->cpu = cd->cpu; 317 + event->cpu = cd_to_ni(cd)->cpu; 321 318 if (NI_EVENT_TYPE(event) == NI_PMU) 322 319 return arm_ni_validate_group(event); 323 320 ··· 546 543 if (err) 547 544 return err; 548 545 549 - cd->cpu = cpumask_local_spread(0, dev_to_node(ni->dev)); 550 - irq_set_affinity(cd->irq, cpumask_of(cd->cpu)); 546 + irq_set_affinity(cd->irq, cpumask_of(ni->cpu)); 551 547 552 548 cd->pmu = (struct pmu) { 553 549 .module = THIS_MODULE, ··· 568 566 if (!name) 569 567 return -ENOMEM; 570 568 571 - err = cpuhp_state_add_instance_nocalls(arm_ni_hp_state, &cd->cpuhp_node); 572 - if (err) 573 - return err; 574 - 575 - err = perf_pmu_register(&cd->pmu, name, -1); 576 - if (err) 577 - cpuhp_state_remove_instance_nocalls(arm_ni_hp_state, &cd->cpuhp_node); 578 - 579 - return err; 569 + return perf_pmu_register(&cd->pmu, name, -1); 580 570 } 581 571 582 572 static void arm_ni_remove(struct platform_device *pdev) 583 573 { 584 574 struct arm_ni *ni = platform_get_drvdata(pdev); 585 575 586 - for (int i = 0; i < ni->num_cds; i++) { 587 - struct arm_ni_cd *cd = ni->cds + i; 588 - 589 - if (!cd->pmu_base) 590 - continue; 591 - 576 + ni_for_each_cd(ni, cd) { 592 577 writel_relaxed(0, cd->pmu_base + NI_PMCR); 593 578 writel_relaxed(U32_MAX, cd->pmu_base + NI_PMINTENCLR); 594 579 perf_pmu_unregister(&cd->pmu); 595 - cpuhp_state_remove_instance_nocalls(arm_ni_hp_state, &cd->cpuhp_node); 596 580 } 581 + cpuhp_state_remove_instance_nocalls(arm_ni_hp_state, &ni->cpuhp_node); 597 582 } 598 583 599 584 static void arm_ni_probe_domain(void __iomem *base, struct arm_ni_node *node) ··· 600 611 struct resource *res; 601 612 void __iomem *base; 602 613 static atomic_t id; 603 - int num_cds; 614 + int ret, num_cds; 604 615 u32 reg, part; 605 616 606 617 /* ··· 651 662 ni->num_cds = num_cds; 652 663 ni->part = part; 653 664 ni->id = atomic_fetch_inc(&id); 665 + ni->cpu = cpumask_local_spread(0, dev_to_node(ni->dev)); 654 666 platform_set_drvdata(pdev, ni); 667 + 668 + ret = cpuhp_state_add_instance_nocalls(arm_ni_hp_state, &ni->cpuhp_node); 669 + if (ret) 670 + return ret; 655 671 656 672 for (int v = 0; v < cfg.num_components; v++) { 657 673 reg = readl_relaxed(cfg.base + NI_CHILD_PTR(v)); ··· 665 671 reg = readl_relaxed(vd.base + NI_CHILD_PTR(p)); 666 672 arm_ni_probe_domain(base + reg, &pd); 667 673 for (int c = 0; c < pd.num_components; c++) { 668 - int ret; 669 - 670 674 reg = readl_relaxed(pd.base + NI_CHILD_PTR(c)); 671 675 arm_ni_probe_domain(base + reg, &cd); 672 676 ret = arm_ni_init_cd(ni, &cd, res->start); ··· 707 715 .remove = arm_ni_remove, 708 716 }; 709 717 710 - static void arm_ni_pmu_migrate(struct arm_ni_cd *cd, unsigned int cpu) 718 + static void arm_ni_pmu_migrate(struct arm_ni *ni, unsigned int cpu) 711 719 { 712 - perf_pmu_migrate_context(&cd->pmu, cd->cpu, cpu); 713 - irq_set_affinity(cd->irq, cpumask_of(cpu)); 714 - cd->cpu = cpu; 720 + ni_for_each_cd(ni, cd) { 721 + perf_pmu_migrate_context(&cd->pmu, ni->cpu, cpu); 722 + irq_set_affinity(cd->irq, cpumask_of(cpu)); 723 + } 724 + ni->cpu = cpu; 715 725 } 716 726 717 727 static int arm_ni_pmu_online_cpu(unsigned int cpu, struct hlist_node *cpuhp_node) 718 728 { 719 - struct arm_ni_cd *cd; 729 + struct arm_ni *ni; 720 730 int node; 721 731 722 - cd = hlist_entry_safe(cpuhp_node, struct arm_ni_cd, cpuhp_node); 723 - node = dev_to_node(cd_to_ni(cd)->dev); 724 - if (cpu_to_node(cd->cpu) != node && cpu_to_node(cpu) == node) 725 - arm_ni_pmu_migrate(cd, cpu); 732 + ni = hlist_entry_safe(cpuhp_node, struct arm_ni, cpuhp_node); 733 + node = dev_to_node(ni->dev); 734 + if (cpu_to_node(ni->cpu) != node && cpu_to_node(cpu) == node) 735 + arm_ni_pmu_migrate(ni, cpu); 726 736 return 0; 727 737 } 728 738 729 739 static int arm_ni_pmu_offline_cpu(unsigned int cpu, struct hlist_node *cpuhp_node) 730 740 { 731 - struct arm_ni_cd *cd; 741 + struct arm_ni *ni; 732 742 unsigned int target; 733 743 int node; 734 744 735 - cd = hlist_entry_safe(cpuhp_node, struct arm_ni_cd, cpuhp_node); 736 - if (cpu != cd->cpu) 745 + ni = hlist_entry_safe(cpuhp_node, struct arm_ni, cpuhp_node); 746 + if (cpu != ni->cpu) 737 747 return 0; 738 748 739 - node = dev_to_node(cd_to_ni(cd)->dev); 749 + node = dev_to_node(ni->dev); 740 750 target = cpumask_any_and_but(cpumask_of_node(node), cpu_online_mask, cpu); 741 751 if (target >= nr_cpu_ids) 742 752 target = cpumask_any_but(cpu_online_mask, cpu); 743 753 744 754 if (target < nr_cpu_ids) 745 - arm_ni_pmu_migrate(cd, target); 755 + arm_ni_pmu_migrate(ni, target); 746 756 return 0; 747 757 } 748 758