perf: Fix duplicate events with multiple-pmu vs software events

Because the multi-pmu bits can share contexts between struct pmu
instances we could get duplicate events by iterating the pmu list.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

authored by Peter Zijlstra and committed by Ingo Molnar 51676957 6313e3c2

+30 -6
+1
include/linux/perf_event.h
··· 887 int exclusive; 888 struct list_head rotation_list; 889 int jiffies_interval; 890 }; 891 892 struct perf_output_handle {
··· 887 int exclusive; 888 struct list_head rotation_list; 889 int jiffies_interval; 890 + struct pmu *active_pmu; 891 }; 892 893 struct perf_output_handle {
+29 -6
kernel/perf_event.c
··· 3824 rcu_read_lock(); 3825 list_for_each_entry_rcu(pmu, &pmus, entry) { 3826 cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); 3827 perf_event_task_ctx(&cpuctx->ctx, task_event); 3828 3829 ctx = task_event->task_ctx; ··· 3961 rcu_read_lock(); 3962 list_for_each_entry_rcu(pmu, &pmus, entry) { 3963 cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); 3964 perf_event_comm_ctx(&cpuctx->ctx, comm_event); 3965 3966 ctxn = pmu->task_ctx_nr; ··· 4148 rcu_read_lock(); 4149 list_for_each_entry_rcu(pmu, &pmus, entry) { 4150 cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); 4151 perf_event_mmap_ctx(&cpuctx->ctx, mmap_event, 4152 vma->vm_flags & VM_EXEC); 4153 ··· 5151 return NULL; 5152 } 5153 5154 - static void free_pmu_context(void * __percpu cpu_context) 5155 { 5156 - struct pmu *pmu; 5157 5158 mutex_lock(&pmus_lock); 5159 /* 5160 * Like a real lame refcount. 5161 */ 5162 - list_for_each_entry(pmu, &pmus, entry) { 5163 - if (pmu->pmu_cpu_context == cpu_context) 5164 goto out; 5165 } 5166 5167 - free_percpu(cpu_context); 5168 out: 5169 mutex_unlock(&pmus_lock); 5170 } ··· 5212 cpuctx->ctx.pmu = pmu; 5213 cpuctx->jiffies_interval = 1; 5214 INIT_LIST_HEAD(&cpuctx->rotation_list); 5215 } 5216 5217 got_cpu_context: ··· 5264 synchronize_rcu(); 5265 5266 free_percpu(pmu->pmu_disable_count); 5267 - free_pmu_context(pmu->pmu_cpu_context); 5268 } 5269 5270 struct pmu *perf_init_event(struct perf_event *event)
··· 3824 rcu_read_lock(); 3825 list_for_each_entry_rcu(pmu, &pmus, entry) { 3826 cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); 3827 + if (cpuctx->active_pmu != pmu) 3828 + goto next; 3829 perf_event_task_ctx(&cpuctx->ctx, task_event); 3830 3831 ctx = task_event->task_ctx; ··· 3959 rcu_read_lock(); 3960 list_for_each_entry_rcu(pmu, &pmus, entry) { 3961 cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); 3962 + if (cpuctx->active_pmu != pmu) 3963 + goto next; 3964 perf_event_comm_ctx(&cpuctx->ctx, comm_event); 3965 3966 ctxn = pmu->task_ctx_nr; ··· 4144 rcu_read_lock(); 4145 list_for_each_entry_rcu(pmu, &pmus, entry) { 4146 cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); 4147 + if (cpuctx->active_pmu != pmu) 4148 + goto next; 4149 perf_event_mmap_ctx(&cpuctx->ctx, mmap_event, 4150 vma->vm_flags & VM_EXEC); 4151 ··· 5145 return NULL; 5146 } 5147 5148 + static void update_pmu_context(struct pmu *pmu, struct pmu *old_pmu) 5149 { 5150 + int cpu; 5151 + 5152 + for_each_possible_cpu(cpu) { 5153 + struct perf_cpu_context *cpuctx; 5154 + 5155 + cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); 5156 + 5157 + if (cpuctx->active_pmu == old_pmu) 5158 + cpuctx->active_pmu = pmu; 5159 + } 5160 + } 5161 + 5162 + static void free_pmu_context(struct pmu *pmu) 5163 + { 5164 + struct pmu *i; 5165 5166 mutex_lock(&pmus_lock); 5167 /* 5168 * Like a real lame refcount. 5169 */ 5170 + list_for_each_entry(i, &pmus, entry) { 5171 + if (i->pmu_cpu_context == pmu->pmu_cpu_context) { 5172 + update_pmu_context(i, pmu); 5173 goto out; 5174 + } 5175 } 5176 5177 + free_percpu(pmu->pmu_cpu_context); 5178 out: 5179 mutex_unlock(&pmus_lock); 5180 } ··· 5190 cpuctx->ctx.pmu = pmu; 5191 cpuctx->jiffies_interval = 1; 5192 INIT_LIST_HEAD(&cpuctx->rotation_list); 5193 + cpuctx->active_pmu = pmu; 5194 } 5195 5196 got_cpu_context: ··· 5241 synchronize_rcu(); 5242 5243 free_percpu(pmu->pmu_disable_count); 5244 + free_pmu_context(pmu); 5245 } 5246 5247 struct pmu *perf_init_event(struct perf_event *event)