Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

powerpc/perf: Add thread IMC PMU support

Add support to register Thread In-Memory Collection PMU counters.
Patch adds thread IMC specific data structures, along with memory
init functions and CPU hotplug support.

Signed-off-by: Anju T Sudhakar <anju@linux.vnet.ibm.com>
Signed-off-by: Hemant Kumar <hemant@linux.vnet.ibm.com>
Signed-off-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>

authored by

Anju T Sudhakar and committed by
Michael Ellerman
f74c89bd 39a846db

+268 -5
+267 -5
arch/powerpc/perf/imc-pmu.c
··· 37 37 struct imc_pmu_ref *core_imc_refc; 38 38 static struct imc_pmu *core_imc_pmu; 39 39 40 + /* Thread IMC data structures and variables */ 41 + 42 + static DEFINE_PER_CPU(u64 *, thread_imc_mem); 43 + static struct imc_pmu *thread_imc_pmu; 44 + static int thread_imc_mem_size; 45 + 40 46 struct imc_pmu *imc_event_to_pmu(struct perf_event *event) 41 47 { 42 48 return container_of(event->pmu, struct imc_pmu, pmu); ··· 734 728 return 0; 735 729 } 736 730 731 + /* 732 + * Allocates a page of memory for each of the online cpus, and write the 733 + * physical base address of that page to the LDBAR for that cpu. 734 + * 735 + * LDBAR Register Layout: 736 + * 737 + * 0 4 8 12 16 20 24 28 738 + * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | 739 + * | | [ ] [ Counter Address [8:50] 740 + * | * Mode | 741 + * | * PB Scope 742 + * * Enable/Disable 743 + * 744 + * 32 36 40 44 48 52 56 60 745 + * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | 746 + * Counter Address [8:50] ] 747 + * 748 + */ 749 + static int thread_imc_mem_alloc(int cpu_id, int size) 750 + { 751 + u64 ldbar_value, *local_mem = per_cpu(thread_imc_mem, cpu_id); 752 + int phys_id = topology_physical_package_id(cpu_id); 753 + 754 + if (!local_mem) { 755 + /* 756 + * This case could happen only once at start, since we dont 757 + * free the memory in cpu offline path. 758 + */ 759 + local_mem = page_address(alloc_pages_node(phys_id, 760 + GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE, 761 + get_order(size))); 762 + if (!local_mem) 763 + return -ENOMEM; 764 + 765 + per_cpu(thread_imc_mem, cpu_id) = local_mem; 766 + } 767 + 768 + ldbar_value = ((u64)local_mem & THREAD_IMC_LDBAR_MASK) | THREAD_IMC_ENABLE; 769 + 770 + mtspr(SPRN_LDBAR, ldbar_value); 771 + return 0; 772 + } 773 + 774 + static int ppc_thread_imc_cpu_online(unsigned int cpu) 775 + { 776 + return thread_imc_mem_alloc(cpu, thread_imc_mem_size); 777 + } 778 + 779 + static int ppc_thread_imc_cpu_offline(unsigned int cpu) 780 + { 781 + mtspr(SPRN_LDBAR, 0); 782 + return 0; 783 + } 784 + 785 + static int thread_imc_cpu_init(void) 786 + { 787 + return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE, 788 + "perf/powerpc/imc_thread:online", 789 + ppc_thread_imc_cpu_online, 790 + ppc_thread_imc_cpu_offline); 791 + } 792 + 793 + void thread_imc_pmu_sched_task(struct perf_event_context *ctx, 794 + bool sched_in) 795 + { 796 + int core_id; 797 + struct imc_pmu_ref *ref; 798 + 799 + if (!is_core_imc_mem_inited(smp_processor_id())) 800 + return; 801 + 802 + core_id = smp_processor_id() / threads_per_core; 803 + /* 804 + * imc pmus are enabled only when it is used. 805 + * See if this is triggered for the first time. 806 + * If yes, take the mutex lock and enable the counters. 807 + * If not, just increment the count in ref count struct. 808 + */ 809 + ref = &core_imc_refc[core_id]; 810 + if (!ref) 811 + return; 812 + 813 + if (sched_in) { 814 + mutex_lock(&ref->lock); 815 + if (ref->refc == 0) { 816 + if (opal_imc_counters_start(OPAL_IMC_COUNTERS_CORE, 817 + get_hard_smp_processor_id(smp_processor_id()))) { 818 + mutex_unlock(&ref->lock); 819 + pr_err("thread-imc: Unable to start the counter\ 820 + for core %d\n", core_id); 821 + return; 822 + } 823 + } 824 + ++ref->refc; 825 + mutex_unlock(&ref->lock); 826 + } else { 827 + mutex_lock(&ref->lock); 828 + ref->refc--; 829 + if (ref->refc == 0) { 830 + if (opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE, 831 + get_hard_smp_processor_id(smp_processor_id()))) { 832 + mutex_unlock(&ref->lock); 833 + pr_err("thread-imc: Unable to stop the counters\ 834 + for core %d\n", core_id); 835 + return; 836 + } 837 + } else if (ref->refc < 0) { 838 + ref->refc = 0; 839 + } 840 + mutex_unlock(&ref->lock); 841 + } 842 + 843 + return; 844 + } 845 + 846 + static int thread_imc_event_init(struct perf_event *event) 847 + { 848 + u32 config = event->attr.config; 849 + struct task_struct *target; 850 + struct imc_pmu *pmu; 851 + 852 + if (event->attr.type != event->pmu->type) 853 + return -ENOENT; 854 + 855 + /* Sampling not supported */ 856 + if (event->hw.sample_period) 857 + return -EINVAL; 858 + 859 + event->hw.idx = -1; 860 + pmu = imc_event_to_pmu(event); 861 + 862 + /* Sanity check for config offset */ 863 + if (((config & IMC_EVENT_OFFSET_MASK) > pmu->counter_mem_size)) 864 + return -EINVAL; 865 + 866 + target = event->hw.target; 867 + if (!target) 868 + return -EINVAL; 869 + 870 + event->pmu->task_ctx_nr = perf_sw_context; 871 + return 0; 872 + } 873 + 874 + static bool is_thread_imc_pmu(struct perf_event *event) 875 + { 876 + if (!strncmp(event->pmu->name, "thread_imc", strlen("thread_imc"))) 877 + return true; 878 + 879 + return false; 880 + } 881 + 737 882 static u64 * get_event_base_addr(struct perf_event *event) 738 883 { 739 - /* 740 - * Subsequent patch will add code to detect caller imc pmu 741 - * and return accordingly. 742 - */ 884 + u64 addr; 885 + 886 + if (is_thread_imc_pmu(event)) { 887 + addr = (u64)per_cpu(thread_imc_mem, smp_processor_id()); 888 + return (u64 *)(addr + (event->attr.config & IMC_EVENT_OFFSET_MASK)); 889 + } 890 + 743 891 return (u64 *)event->hw.event_base; 892 + } 893 + 894 + static void thread_imc_pmu_start_txn(struct pmu *pmu, 895 + unsigned int txn_flags) 896 + { 897 + if (txn_flags & ~PERF_PMU_TXN_ADD) 898 + return; 899 + perf_pmu_disable(pmu); 900 + } 901 + 902 + static void thread_imc_pmu_cancel_txn(struct pmu *pmu) 903 + { 904 + perf_pmu_enable(pmu); 905 + } 906 + 907 + static int thread_imc_pmu_commit_txn(struct pmu *pmu) 908 + { 909 + perf_pmu_enable(pmu); 910 + return 0; 744 911 } 745 912 746 913 static u64 imc_read_counter(struct perf_event *event) ··· 973 794 return 0; 974 795 } 975 796 797 + static int thread_imc_event_add(struct perf_event *event, int flags) 798 + { 799 + if (flags & PERF_EF_START) 800 + imc_event_start(event, flags); 801 + 802 + /* Enable the sched_task to start the engine */ 803 + perf_sched_cb_inc(event->ctx->pmu); 804 + return 0; 805 + } 806 + 807 + static void thread_imc_event_del(struct perf_event *event, int flags) 808 + { 809 + /* 810 + * Take a snapshot and calculate the delta and update 811 + * the event counter values. 812 + */ 813 + imc_event_update(event); 814 + perf_sched_cb_dec(event->ctx->pmu); 815 + } 816 + 976 817 /* update_pmu_ops : Populate the appropriate operations for "pmu" */ 977 818 static int update_pmu_ops(struct imc_pmu *pmu) 978 819 { ··· 1013 814 case IMC_DOMAIN_CORE: 1014 815 pmu->pmu.event_init = core_imc_event_init; 1015 816 pmu->attr_groups[IMC_CPUMASK_ATTR] = &imc_pmu_cpumask_attr_group; 817 + break; 818 + case IMC_DOMAIN_THREAD: 819 + pmu->pmu.event_init = thread_imc_event_init; 820 + pmu->pmu.sched_task = thread_imc_pmu_sched_task; 821 + pmu->pmu.add = thread_imc_event_add; 822 + pmu->pmu.del = thread_imc_event_del; 823 + pmu->pmu.start_txn = thread_imc_pmu_start_txn; 824 + pmu->pmu.cancel_txn = thread_imc_pmu_cancel_txn; 825 + pmu->pmu.commit_txn = thread_imc_pmu_commit_txn; 1016 826 break; 1017 827 default: 1018 828 break; ··· 1090 882 kfree(core_imc_refc); 1091 883 } 1092 884 885 + static void thread_imc_ldbar_disable(void *dummy) 886 + { 887 + /* 888 + * By Zeroing LDBAR, we disable thread-imc 889 + * updates. 890 + */ 891 + mtspr(SPRN_LDBAR, 0); 892 + } 893 + 894 + void thread_imc_disable(void) 895 + { 896 + on_each_cpu(thread_imc_ldbar_disable, NULL, 1); 897 + } 898 + 899 + static void cleanup_all_thread_imc_memory(void) 900 + { 901 + int i, order = get_order(thread_imc_mem_size); 902 + 903 + for_each_online_cpu(i) { 904 + if (per_cpu(thread_imc_mem, i)) 905 + free_pages((u64)per_cpu(thread_imc_mem, i), order); 906 + 907 + } 908 + } 909 + 1093 910 /* 1094 911 * Common function to unregister cpu hotplug callback and 1095 912 * free the memory. ··· 1141 908 cleanup_all_core_imc_memory(); 1142 909 } 1143 910 911 + /* Free thread_imc memory */ 912 + if (pmu_ptr->domain == IMC_DOMAIN_THREAD) { 913 + cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE); 914 + cleanup_all_thread_imc_memory(); 915 + } 916 + 1144 917 /* Only free the attr_groups which are dynamically allocated */ 1145 918 kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs); 1146 919 kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]); ··· 1162 923 int pmu_index) 1163 924 { 1164 925 const char *s; 1165 - int nr_cores; 926 + int nr_cores, cpu, res; 1166 927 1167 928 if (of_property_read_string(parent, "name", &s)) 1168 929 return -ENODEV; ··· 1197 958 return -ENOMEM; 1198 959 1199 960 core_imc_pmu = pmu_ptr; 961 + break; 962 + case IMC_DOMAIN_THREAD: 963 + /* Update the pmu name */ 964 + pmu_ptr->pmu.name = kasprintf(GFP_KERNEL, "%s%s", s, "_imc"); 965 + if (!pmu_ptr->pmu.name) 966 + return -ENOMEM; 967 + 968 + thread_imc_mem_size = pmu_ptr->counter_mem_size; 969 + for_each_online_cpu(cpu) { 970 + res = thread_imc_mem_alloc(cpu, pmu_ptr->counter_mem_size); 971 + if (res) 972 + return res; 973 + } 974 + 975 + thread_imc_pmu = pmu_ptr; 1200 976 break; 1201 977 default: 1202 978 return -EINVAL; ··· 1267 1013 ret = core_imc_pmu_cpumask_init(); 1268 1014 if (ret) { 1269 1015 cleanup_all_core_imc_memory(); 1016 + return ret; 1017 + } 1018 + 1019 + break; 1020 + case IMC_DOMAIN_THREAD: 1021 + ret = thread_imc_cpu_init(); 1022 + if (ret) { 1023 + cleanup_all_thread_imc_memory(); 1270 1024 return ret; 1271 1025 } 1272 1026
+1
include/linux/cpuhotplug.h
··· 141 141 CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE, 142 142 CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE, 143 143 CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE, 144 + CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE, 144 145 CPUHP_AP_WORKQUEUE_ONLINE, 145 146 CPUHP_AP_RCUTREE_ONLINE, 146 147 CPUHP_AP_ONLINE_DYN,