Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf: Fix perf_event_pmu_context serialization

Syzkaller triggered a WARN in put_pmu_ctx().

WARNING: CPU: 1 PID: 2245 at kernel/events/core.c:4925 put_pmu_ctx+0x1f0/0x278

This is because there is no locking around the access of "if
(!epc->ctx)" in find_get_pmu_context() and when it is set to NULL in
put_pmu_ctx().

The decrement of the reference count in put_pmu_ctx() also happens
outside of the spinlock, leading to the possibility of this order of
events, and the context being cleared in put_pmu_ctx(), after its
refcount is non zero:

CPU0 CPU1
find_get_pmu_context()
if (!epc->ctx) == false
put_pmu_ctx()
atomic_dec_and_test(&epc->refcount) == true
epc->refcount == 0
atomic_inc(&epc->refcount);
epc->refcount == 1
list_del_init(&epc->pmu_ctx_entry);
epc->ctx = NULL;

Another issue is that WARN_ON for no active PMU events in put_pmu_ctx()
is outside of the lock. If the perf_event_pmu_context is an embedded
one, even after clearing it, it won't be deleted and can be re-used. So
the warning can trigger. For this reason it also needs to be moved
inside the lock.

The above warning is very quick to trigger on Arm by running these two
commands at the same time:

while true; do perf record -- ls; done
while true; do perf record -- ls; done

[peterz: atomic_dec_and_raw_lock*()]
Fixes: bd2756811766 ("perf: Rewrite core context handling")
Reported-by: syzbot+697196bc0265049822bd@syzkaller.appspotmail.com
Signed-off-by: James Clark <james.clark@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Ravi Bangoria <ravi.bangoria@amd.com>
Link: https://lore.kernel.org/r/20230127143141.1782804-2-james.clark@arm.com

authored by

James Clark and committed by
Peter Zijlstra
4f64a6c9 6d796c50

+57 -22
+9
include/linux/spinlock.h
··· 476 476 #define atomic_dec_and_lock_irqsave(atomic, lock, flags) \ 477 477 __cond_lock(lock, _atomic_dec_and_lock_irqsave(atomic, lock, &(flags))) 478 478 479 + extern int _atomic_dec_and_raw_lock(atomic_t *atomic, raw_spinlock_t *lock); 480 + #define atomic_dec_and_raw_lock(atomic, lock) \ 481 + __cond_lock(lock, _atomic_dec_and_raw_lock(atomic, lock)) 482 + 483 + extern int _atomic_dec_and_raw_lock_irqsave(atomic_t *atomic, raw_spinlock_t *lock, 484 + unsigned long *flags); 485 + #define atomic_dec_and_raw_lock_irqsave(atomic, lock, flags) \ 486 + __cond_lock(lock, _atomic_dec_and_raw_lock_irqsave(atomic, lock, &(flags))) 487 + 479 488 int __alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *lock_mask, 480 489 size_t max_size, unsigned int cpu_mult, 481 490 gfp_t gfp, const char *name,
+17 -22
kernel/events/core.c
··· 4813 4813 4814 4814 cpc = per_cpu_ptr(pmu->cpu_pmu_context, event->cpu); 4815 4815 epc = &cpc->epc; 4816 - 4816 + raw_spin_lock_irq(&ctx->lock); 4817 4817 if (!epc->ctx) { 4818 4818 atomic_set(&epc->refcount, 1); 4819 4819 epc->embedded = 1; 4820 - raw_spin_lock_irq(&ctx->lock); 4821 4820 list_add(&epc->pmu_ctx_entry, &ctx->pmu_ctx_list); 4822 4821 epc->ctx = ctx; 4823 - raw_spin_unlock_irq(&ctx->lock); 4824 4822 } else { 4825 4823 WARN_ON_ONCE(epc->ctx != ctx); 4826 4824 atomic_inc(&epc->refcount); 4827 4825 } 4828 - 4826 + raw_spin_unlock_irq(&ctx->lock); 4829 4827 return epc; 4830 4828 } 4831 4829 ··· 4894 4896 4895 4897 static void put_pmu_ctx(struct perf_event_pmu_context *epc) 4896 4898 { 4899 + struct perf_event_context *ctx = epc->ctx; 4897 4900 unsigned long flags; 4898 4901 4899 - if (!atomic_dec_and_test(&epc->refcount)) 4902 + /* 4903 + * XXX 4904 + * 4905 + * lockdep_assert_held(&ctx->mutex); 4906 + * 4907 + * can't because of the call-site in _free_event()/put_event() 4908 + * which isn't always called under ctx->mutex. 4909 + */ 4910 + if (!atomic_dec_and_raw_lock_irqsave(&epc->refcount, &ctx->lock, flags)) 4900 4911 return; 4901 4912 4902 - if (epc->ctx) { 4903 - struct perf_event_context *ctx = epc->ctx; 4913 + WARN_ON_ONCE(list_empty(&epc->pmu_ctx_entry)); 4904 4914 4905 - /* 4906 - * XXX 4907 - * 4908 - * lockdep_assert_held(&ctx->mutex); 4909 - * 4910 - * can't because of the call-site in _free_event()/put_event() 4911 - * which isn't always called under ctx->mutex. 4912 - */ 4913 - 4914 - WARN_ON_ONCE(list_empty(&epc->pmu_ctx_entry)); 4915 - raw_spin_lock_irqsave(&ctx->lock, flags); 4916 - list_del_init(&epc->pmu_ctx_entry); 4917 - epc->ctx = NULL; 4918 - raw_spin_unlock_irqrestore(&ctx->lock, flags); 4919 - } 4915 + list_del_init(&epc->pmu_ctx_entry); 4916 + epc->ctx = NULL; 4920 4917 4921 4918 WARN_ON_ONCE(!list_empty(&epc->pinned_active)); 4922 4919 WARN_ON_ONCE(!list_empty(&epc->flexible_active)); 4920 + 4921 + raw_spin_unlock_irqrestore(&ctx->lock, flags); 4923 4922 4924 4923 if (epc->embedded) 4925 4924 return;
+31
lib/dec_and_lock.c
··· 49 49 return 0; 50 50 } 51 51 EXPORT_SYMBOL(_atomic_dec_and_lock_irqsave); 52 + 53 + int _atomic_dec_and_raw_lock(atomic_t *atomic, raw_spinlock_t *lock) 54 + { 55 + /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */ 56 + if (atomic_add_unless(atomic, -1, 1)) 57 + return 0; 58 + 59 + /* Otherwise do it the slow way */ 60 + raw_spin_lock(lock); 61 + if (atomic_dec_and_test(atomic)) 62 + return 1; 63 + raw_spin_unlock(lock); 64 + return 0; 65 + } 66 + EXPORT_SYMBOL(_atomic_dec_and_raw_lock); 67 + 68 + int _atomic_dec_and_raw_lock_irqsave(atomic_t *atomic, raw_spinlock_t *lock, 69 + unsigned long *flags) 70 + { 71 + /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */ 72 + if (atomic_add_unless(atomic, -1, 1)) 73 + return 0; 74 + 75 + /* Otherwise do it the slow way */ 76 + raw_spin_lock_irqsave(lock, *flags); 77 + if (atomic_dec_and_test(atomic)) 78 + return 1; 79 + raw_spin_unlock_irqrestore(lock, *flags); 80 + return 0; 81 + } 82 + EXPORT_SYMBOL(_atomic_dec_and_raw_lock_irqsave);