Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf fixes from Thomas Gleixner:
"A set of perf related fixes:

- fix a CR4.PCE propagation issue caused by usage of mm instead of
active_mm and therefore propagated the wrong value.

- perf core fixes, which plug a use-after-free issue and make the
event inheritance on fork more robust.

- a tooling fix for symbol handling"

* 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
perf symbols: Fix symbols__fixup_end heuristic for corner cases
x86/perf: Clarify why x86_pmu_event_mapped() isn't racy
x86/perf: Fix CR4.PCE propagation to use active_mm instead of mm
perf/core: Better explain the inherit magic
perf/core: Simplify perf_event_free_task()
perf/core: Fix event inheritance on fork()
perf/core: Fix use-after-free in perf_release()

+63 -19
+14 -2
arch/x86/events/core.c
··· 2101 2101 2102 2102 static void refresh_pce(void *ignored) 2103 2103 { 2104 - if (current->mm) 2105 - load_mm_cr4(current->mm); 2104 + if (current->active_mm) 2105 + load_mm_cr4(current->active_mm); 2106 2106 } 2107 2107 2108 2108 static void x86_pmu_event_mapped(struct perf_event *event) 2109 2109 { 2110 2110 if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED)) 2111 2111 return; 2112 + 2113 + /* 2114 + * This function relies on not being called concurrently in two 2115 + * tasks in the same mm. Otherwise one task could observe 2116 + * perf_rdpmc_allowed > 1 and return all the way back to 2117 + * userspace with CR4.PCE clear while another task is still 2118 + * doing on_each_cpu_mask() to propagate CR4.PCE. 2119 + * 2120 + * For now, this can't happen because all callers hold mmap_sem 2121 + * for write. If this changes, we'll need a different solution. 2122 + */ 2123 + lockdep_assert_held_exclusive(&current->mm->mmap_sem); 2112 2124 2113 2125 if (atomic_inc_return(&current->mm->context.perf_rdpmc_allowed) == 1) 2114 2126 on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1);
+48 -16
kernel/events/core.c
··· 4256 4256 4257 4257 raw_spin_lock_irq(&ctx->lock); 4258 4258 /* 4259 - * Mark this even as STATE_DEAD, there is no external reference to it 4259 + * Mark this event as STATE_DEAD, there is no external reference to it 4260 4260 * anymore. 4261 4261 * 4262 4262 * Anybody acquiring event->child_mutex after the below loop _must_ ··· 10417 10417 continue; 10418 10418 10419 10419 mutex_lock(&ctx->mutex); 10420 - again: 10421 - list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, 10422 - group_entry) 10423 - perf_free_event(event, ctx); 10420 + raw_spin_lock_irq(&ctx->lock); 10421 + /* 10422 + * Destroy the task <-> ctx relation and mark the context dead. 10423 + * 10424 + * This is important because even though the task hasn't been 10425 + * exposed yet the context has been (through child_list). 10426 + */ 10427 + RCU_INIT_POINTER(task->perf_event_ctxp[ctxn], NULL); 10428 + WRITE_ONCE(ctx->task, TASK_TOMBSTONE); 10429 + put_task_struct(task); /* cannot be last */ 10430 + raw_spin_unlock_irq(&ctx->lock); 10424 10431 10425 - list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, 10426 - group_entry) 10432 + list_for_each_entry_safe(event, tmp, &ctx->event_list, event_entry) 10427 10433 perf_free_event(event, ctx); 10428 - 10429 - if (!list_empty(&ctx->pinned_groups) || 10430 - !list_empty(&ctx->flexible_groups)) 10431 - goto again; 10432 10434 10433 10435 mutex_unlock(&ctx->mutex); 10434 - 10435 10436 put_ctx(ctx); 10436 10437 } 10437 10438 } ··· 10470 10469 } 10471 10470 10472 10471 /* 10473 - * inherit a event from parent task to child task: 10472 + * Inherit a event from parent task to child task. 10473 + * 10474 + * Returns: 10475 + * - valid pointer on success 10476 + * - NULL for orphaned events 10477 + * - IS_ERR() on error 10474 10478 */ 10475 10479 static struct perf_event * 10476 10480 inherit_event(struct perf_event *parent_event, ··· 10569 10563 return child_event; 10570 10564 } 10571 10565 10566 + /* 10567 + * Inherits an event group. 10568 + * 10569 + * This will quietly suppress orphaned events; !inherit_event() is not an error. 10570 + * This matches with perf_event_release_kernel() removing all child events. 10571 + * 10572 + * Returns: 10573 + * - 0 on success 10574 + * - <0 on error 10575 + */ 10572 10576 static int inherit_group(struct perf_event *parent_event, 10573 10577 struct task_struct *parent, 10574 10578 struct perf_event_context *parent_ctx, ··· 10593 10577 child, NULL, child_ctx); 10594 10578 if (IS_ERR(leader)) 10595 10579 return PTR_ERR(leader); 10580 + /* 10581 + * @leader can be NULL here because of is_orphaned_event(). In this 10582 + * case inherit_event() will create individual events, similar to what 10583 + * perf_group_detach() would do anyway. 10584 + */ 10596 10585 list_for_each_entry(sub, &parent_event->sibling_list, group_entry) { 10597 10586 child_ctr = inherit_event(sub, parent, parent_ctx, 10598 10587 child, leader, child_ctx); ··· 10607 10586 return 0; 10608 10587 } 10609 10588 10589 + /* 10590 + * Creates the child task context and tries to inherit the event-group. 10591 + * 10592 + * Clears @inherited_all on !attr.inherited or error. Note that we'll leave 10593 + * inherited_all set when we 'fail' to inherit an orphaned event; this is 10594 + * consistent with perf_event_release_kernel() removing all child events. 10595 + * 10596 + * Returns: 10597 + * - 0 on success 10598 + * - <0 on error 10599 + */ 10610 10600 static int 10611 10601 inherit_task_group(struct perf_event *event, struct task_struct *parent, 10612 10602 struct perf_event_context *parent_ctx, ··· 10640 10608 * First allocate and initialize a context for the 10641 10609 * child. 10642 10610 */ 10643 - 10644 10611 child_ctx = alloc_perf_context(parent_ctx->pmu, child); 10645 10612 if (!child_ctx) 10646 10613 return -ENOMEM; ··· 10701 10670 ret = inherit_task_group(event, parent, parent_ctx, 10702 10671 child, ctxn, &inherited_all); 10703 10672 if (ret) 10704 - break; 10673 + goto out_unlock; 10705 10674 } 10706 10675 10707 10676 /* ··· 10717 10686 ret = inherit_task_group(event, parent, parent_ctx, 10718 10687 child, ctxn, &inherited_all); 10719 10688 if (ret) 10720 - break; 10689 + goto out_unlock; 10721 10690 } 10722 10691 10723 10692 raw_spin_lock_irqsave(&parent_ctx->lock, flags); ··· 10745 10714 } 10746 10715 10747 10716 raw_spin_unlock_irqrestore(&parent_ctx->lock, flags); 10717 + out_unlock: 10748 10718 mutex_unlock(&parent_ctx->mutex); 10749 10719 10750 10720 perf_unpin_context(parent_ctx);
+1 -1
tools/perf/util/symbol.c
··· 202 202 203 203 /* Last entry */ 204 204 if (curr->end == curr->start) 205 - curr->end = roundup(curr->start, 4096); 205 + curr->end = roundup(curr->start, 4096) + 4096; 206 206 } 207 207 208 208 void __map_groups__fixup_end(struct map_groups *mg, enum map_type type)