perf/x86/lbr: Remove cpuc->lbr_xsave allocation from atomic context

If the kernel is compiled with the CONFIG_LOCKDEP option, the conditional
might_sleep_if() deep in kmem_cache_alloc() will generate the following
trace, and potentially cause a deadlock when another LBR event is added:

[] BUG: sleeping function called from invalid context at include/linux/sched/mm.h:196
[] Call Trace:
[] kmem_cache_alloc+0x36/0x250
[] intel_pmu_lbr_add+0x152/0x170
[] x86_pmu_add+0x83/0xd0

Make it symmetric with the release_lbr_buffers() call and mirror the
existing DS buffers.

Fixes: c085fb8774 ("perf/x86/intel/lbr: Support XSAVES for arch LBR read")
Signed-off-by: Like Xu <like.xu@linux.intel.com>
[peterz: simplified]
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Kan Liang <kan.liang@linux.intel.com>
Link: https://lkml.kernel.org/r/20210430052247.3079672-2-like.xu@linux.intel.com

authored by Like Xu and committed by Peter Zijlstra 488e13a4 3317c26a

Changed files
+30 -8
arch
x86
events
+4 -2
arch/x86/events/core.c
··· 396 396 if (!atomic_inc_not_zero(&pmc_refcount)) { 397 397 mutex_lock(&pmc_reserve_mutex); 398 398 if (atomic_read(&pmc_refcount) == 0) { 399 - if (!reserve_pmc_hardware()) 399 + if (!reserve_pmc_hardware()) { 400 400 err = -EBUSY; 401 - else 401 + } else { 402 402 reserve_ds_buffers(); 403 + reserve_lbr_buffers(); 404 + } 403 405 } 404 406 if (!err) 405 407 atomic_inc(&pmc_refcount);
+20 -6
arch/x86/events/intel/lbr.c
··· 658 658 659 659 void intel_pmu_lbr_add(struct perf_event *event) 660 660 { 661 - struct kmem_cache *kmem_cache = event->pmu->task_ctx_cache; 662 661 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); 663 662 664 663 if (!x86_pmu.lbr_nr) ··· 695 696 perf_sched_cb_inc(event->ctx->pmu); 696 697 if (!cpuc->lbr_users++ && !event->total_time_running) 697 698 intel_pmu_lbr_reset(); 698 - 699 - if (static_cpu_has(X86_FEATURE_ARCH_LBR) && 700 - kmem_cache && !cpuc->lbr_xsave && 701 - (cpuc->lbr_users != cpuc->lbr_pebs_users)) 702 - cpuc->lbr_xsave = kmem_cache_alloc(kmem_cache, GFP_KERNEL); 703 699 } 704 700 705 701 void release_lbr_buffers(void) ··· 713 719 kmem_cache_free(kmem_cache, cpuc->lbr_xsave); 714 720 cpuc->lbr_xsave = NULL; 715 721 } 722 + } 723 + } 724 + 725 + void reserve_lbr_buffers(void) 726 + { 727 + struct kmem_cache *kmem_cache; 728 + struct cpu_hw_events *cpuc; 729 + int cpu; 730 + 731 + if (!static_cpu_has(X86_FEATURE_ARCH_LBR)) 732 + return; 733 + 734 + for_each_possible_cpu(cpu) { 735 + cpuc = per_cpu_ptr(&cpu_hw_events, cpu); 736 + kmem_cache = x86_get_pmu(cpu)->task_ctx_cache; 737 + if (!kmem_cache || cpuc->lbr_xsave) 738 + continue; 739 + 740 + cpuc->lbr_xsave = kmem_cache_alloc_node(kmem_cache, GFP_KERNEL, 741 + cpu_to_node(cpu)); 716 742 } 717 743 } 718 744
+6
arch/x86/events/perf_event.h
··· 1244 1244 1245 1245 void release_lbr_buffers(void); 1246 1246 1247 + void reserve_lbr_buffers(void); 1248 + 1247 1249 extern struct event_constraint bts_constraint; 1248 1250 extern struct event_constraint vlbr_constraint; 1249 1251 ··· 1392 1390 } 1393 1391 1394 1392 static inline void release_lbr_buffers(void) 1393 + { 1394 + } 1395 + 1396 + static inline void reserve_lbr_buffers(void) 1395 1397 { 1396 1398 } 1397 1399