perf/core: Fix data race between perf_event_set_output() and perf_mmap_close()

Yang Jihing reported a race between perf_event_set_output() and
perf_mmap_close():

CPU1 CPU2

perf_mmap_close(e2)
if (atomic_dec_and_test(&e2->rb->mmap_count)) // 1 - > 0
detach_rest = true

ioctl(e1, IOC_SET_OUTPUT, e2)
perf_event_set_output(e1, e2)

...
list_for_each_entry_rcu(e, &e2->rb->event_list, rb_entry)
ring_buffer_attach(e, NULL);
// e1 isn't yet added and
// therefore not detached

ring_buffer_attach(e1, e2->rb)
list_add_rcu(&e1->rb_entry,
&e2->rb->event_list)

After this; e1 is attached to an unmapped rb and a subsequent
perf_mmap() will loop forever more:

again:
mutex_lock(&e->mmap_mutex);
if (event->rb) {
...
if (!atomic_inc_not_zero(&e->rb->mmap_count)) {
...
mutex_unlock(&e->mmap_mutex);
goto again;
}
}

The loop in perf_mmap_close() holds e2->mmap_mutex, while the attach
in perf_event_set_output() holds e1->mmap_mutex. As such there is no
serialization to avoid this race.

Change perf_event_set_output() to take both e1->mmap_mutex and
e2->mmap_mutex to alleviate that problem. Additionally, have the loop
in perf_mmap() detach the rb directly, this avoids having to wait for
the concurrent perf_mmap_close() to get around to doing it to make
progress.

Fixes: 9bb5d40cd93c ("perf: Fix mmap() accounting hole")
Reported-by: Yang Jihong <yangjihong1@huawei.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Yang Jihong <yangjihong1@huawei.com>
Link: https://lkml.kernel.org/r/YsQ3jm2GR38SW7uD@worktop.programming.kicks-ass.net

Changed files
+31 -14
kernel
events
+31 -14
kernel/events/core.c
··· 6253 6253 6254 6254 if (!atomic_inc_not_zero(&event->rb->mmap_count)) { 6255 6255 /* 6256 - * Raced against perf_mmap_close() through 6257 - * perf_event_set_output(). Try again, hope for better 6258 - * luck. 6256 + * Raced against perf_mmap_close(); remove the 6257 + * event and try again. 6259 6258 */ 6259 + ring_buffer_attach(event, NULL); 6260 6260 mutex_unlock(&event->mmap_mutex); 6261 6261 goto again; 6262 6262 } ··· 11825 11825 goto out; 11826 11826 } 11827 11827 11828 + static void mutex_lock_double(struct mutex *a, struct mutex *b) 11829 + { 11830 + if (b < a) 11831 + swap(a, b); 11832 + 11833 + mutex_lock(a); 11834 + mutex_lock_nested(b, SINGLE_DEPTH_NESTING); 11835 + } 11836 + 11828 11837 static int 11829 11838 perf_event_set_output(struct perf_event *event, struct perf_event *output_event) 11830 11839 { 11831 11840 struct perf_buffer *rb = NULL; 11832 11841 int ret = -EINVAL; 11833 11842 11834 - if (!output_event) 11843 + if (!output_event) { 11844 + mutex_lock(&event->mmap_mutex); 11835 11845 goto set; 11846 + } 11836 11847 11837 11848 /* don't allow circular references */ 11838 11849 if (event == output_event) ··· 11881 11870 event->pmu != output_event->pmu) 11882 11871 goto out; 11883 11872 11873 + /* 11874 + * Hold both mmap_mutex to serialize against perf_mmap_close(). Since 11875 + * output_event is already on rb->event_list, and the list iteration 11876 + * restarts after every removal, it is guaranteed this new event is 11877 + * observed *OR* if output_event is already removed, it's guaranteed we 11878 + * observe !rb->mmap_count. 11879 + */ 11880 + mutex_lock_double(&event->mmap_mutex, &output_event->mmap_mutex); 11884 11881 set: 11885 - mutex_lock(&event->mmap_mutex); 11886 11882 /* Can't redirect output if we've got an active mmap() */ 11887 11883 if (atomic_read(&event->mmap_count)) 11888 11884 goto unlock; ··· 11899 11881 rb = ring_buffer_get(output_event); 11900 11882 if (!rb) 11901 11883 goto unlock; 11884 + 11885 + /* did we race against perf_mmap_close() */ 11886 + if (!atomic_read(&rb->mmap_count)) { 11887 + ring_buffer_put(rb); 11888 + goto unlock; 11889 + } 11902 11890 } 11903 11891 11904 11892 ring_buffer_attach(event, rb); ··· 11912 11888 ret = 0; 11913 11889 unlock: 11914 11890 mutex_unlock(&event->mmap_mutex); 11891 + if (output_event) 11892 + mutex_unlock(&output_event->mmap_mutex); 11915 11893 11916 11894 out: 11917 11895 return ret; 11918 - } 11919 - 11920 - static void mutex_lock_double(struct mutex *a, struct mutex *b) 11921 - { 11922 - if (b < a) 11923 - swap(a, b); 11924 - 11925 - mutex_lock(a); 11926 - mutex_lock_nested(b, SINGLE_DEPTH_NESTING); 11927 11896 } 11928 11897 11929 11898 static int perf_event_set_clock(struct perf_event *event, clockid_t clk_id)