Merge tag 'perf-urgent-2026-03-22' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git

kernel os linux

Pull perf fixes from Ingo Molnar:

- Fix a PMU driver crash on AMD EPYC systems, caused by
a race condition in x86_pmu_enable()

- Fix a possible counter-initialization bug in x86_pmu_enable()

- Fix a counter inheritance bug in inherit_event() and
__perf_event_read()

- Fix an Intel PMU driver branch constraints handling bug
found by UBSAN

- Fix the Intel PMU driver's new Off-Module Response (OMR)
support code for Diamond Rapids / Nova lake, to fix a snoop
information parsing bug

* tag 'perf-urgent-2026-03-22' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
perf/x86/intel: Fix OMR snoop information parsing issues
perf/x86/intel: Add missing branch counters constraint apply
perf: Make sure to use pmu_ctx->pmu for groups
x86/perf: Make sure to program the counter value for stopped events on migration
perf/x86: Move event pointer setup earlier in x86_pmu_enable()

Linus Torvalds 2 weeks ago ebfd9b7a dea622e1

+41 -27

4 changed files

expand all

arch

x86

events

core.c

intel

core.c

ds.c

kernel

events

core.c

+5 -2

arch/x86/events/core.c

··· 1372 1372 else if (i < n_running) 1373 1373 continue; 1374 1374 1375 - if (hwc->state & PERF_HES_ARCH) 1375 + cpuc->events[hwc->idx] = event; 1376 + 1377 + if (hwc->state & PERF_HES_ARCH) { 1378 + static_call(x86_pmu_set_period)(event); 1376 1379 continue; 1380 + } 1377 1381 1378 1382 /* 1379 1383 * if cpuc->enabled = 0, then no wrmsr as 1380 1384 * per x86_pmu_enable_event() 1381 1385 */ 1382 - cpuc->events[hwc->idx] = event; 1383 1386 x86_pmu_start(event, PERF_EF_RELOAD); 1384 1387 } 1385 1388 cpuc->n_added = 0;

+21 -10

arch/x86/events/intel/core.c

··· 4628 4628 event->hw.dyn_constraint &= hybrid(event->pmu, acr_cause_mask64); 4629 4629 } 4630 4630 4631 + static inline int intel_set_branch_counter_constr(struct perf_event *event, 4632 + int *num) 4633 + { 4634 + if (branch_sample_call_stack(event)) 4635 + return -EINVAL; 4636 + if (branch_sample_counters(event)) { 4637 + (*num)++; 4638 + event->hw.dyn_constraint &= x86_pmu.lbr_counters; 4639 + } 4640 + 4641 + return 0; 4642 + } 4643 + 4631 4644 static int intel_pmu_hw_config(struct perf_event *event) 4632 4645 { 4633 4646 int ret = x86_pmu_hw_config(event); ··· 4711 4698 * group, which requires the extra space to store the counters. 4712 4699 */ 4713 4700 leader = event->group_leader; 4714 - if (branch_sample_call_stack(leader)) 4701 + if (intel_set_branch_counter_constr(leader, &num)) 4715 4702 return -EINVAL; 4716 - if (branch_sample_counters(leader)) { 4717 - num++; 4718 - leader->hw.dyn_constraint &= x86_pmu.lbr_counters; 4719 - } 4720 4703 leader->hw.flags |= PERF_X86_EVENT_BRANCH_COUNTERS; 4721 4704 4722 4705 for_each_sibling_event(sibling, leader) { 4723 - if (branch_sample_call_stack(sibling)) 4706 + if (intel_set_branch_counter_constr(sibling, &num)) 4724 4707 return -EINVAL; 4725 - if (branch_sample_counters(sibling)) { 4726 - num++; 4727 - sibling->hw.dyn_constraint &= x86_pmu.lbr_counters; 4728 - } 4708 + } 4709 + 4710 + /* event isn't installed as a sibling yet. */ 4711 + if (event != leader) { 4712 + if (intel_set_branch_counter_constr(event, &num)) 4713 + return -EINVAL; 4729 4714 } 4730 4715 4731 4716 if (num > fls(x86_pmu.lbr_counters))

+7 -4

arch/x86/events/intel/ds.c

··· 345 345 if (omr.omr_remote) 346 346 val |= REM; 347 347 348 - val |= omr.omr_hitm ? P(SNOOP, HITM) : P(SNOOP, HIT); 349 - 350 348 if (omr.omr_source == 0x2) { 351 - u8 snoop = omr.omr_snoop | omr.omr_promoted; 349 + u8 snoop = omr.omr_snoop | (omr.omr_promoted << 1); 352 350 353 - if (snoop == 0x0) 351 + if (omr.omr_hitm) 352 + val |= P(SNOOP, HITM); 353 + else if (snoop == 0x0) 354 354 val |= P(SNOOP, NA); 355 355 else if (snoop == 0x1) 356 356 val |= P(SNOOP, MISS); ··· 359 359 else if (snoop == 0x3) 360 360 val |= P(SNOOP, NONE); 361 361 } else if (omr.omr_source > 0x2 && omr.omr_source < 0x7) { 362 + val |= omr.omr_hitm ? P(SNOOP, HITM) : P(SNOOP, HIT); 362 363 val |= omr.omr_snoop ? P(SNOOPX, FWD) : 0; 364 + } else { 365 + val |= P(SNOOP, NONE); 363 366 } 364 367 365 368 return val;

+8 -11

kernel/events/core.c

··· 4813 4813 struct perf_event *sub, *event = data->event; 4814 4814 struct perf_event_context *ctx = event->ctx; 4815 4815 struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context); 4816 - struct pmu *pmu = event->pmu; 4816 + struct pmu *pmu; 4817 4817 4818 4818 /* 4819 4819 * If this is a task context, we need to check whether it is ··· 4825 4825 if (ctx->task && cpuctx->task_ctx != ctx) 4826 4826 return; 4827 4827 4828 - raw_spin_lock(&ctx->lock); 4828 + guard(raw_spinlock)(&ctx->lock); 4829 4829 ctx_time_update_event(ctx, event); 4830 4830 4831 4831 perf_event_update_time(event); ··· 4833 4833 perf_event_update_sibling_time(event); 4834 4834 4835 4835 if (event->state != PERF_EVENT_STATE_ACTIVE) 4836 - goto unlock; 4836 + return; 4837 4837 4838 4838 if (!data->group) { 4839 - pmu->read(event); 4839 + perf_pmu_read(event); 4840 4840 data->ret = 0; 4841 - goto unlock; 4841 + return; 4842 4842 } 4843 4843 4844 + pmu = event->pmu_ctx->pmu; 4844 4845 pmu->start_txn(pmu, PERF_PMU_TXN_READ); 4845 4846 4846 - pmu->read(event); 4847 - 4847 + perf_pmu_read(event); 4848 4848 for_each_sibling_event(sub, event) 4849 4849 perf_pmu_read(sub); 4850 4850 4851 4851 data->ret = pmu->commit_txn(pmu); 4852 - 4853 - unlock: 4854 - raw_spin_unlock(&ctx->lock); 4855 4852 } 4856 4853 4857 4854 static inline u64 perf_event_count(struct perf_event *event, bool self) ··· 14741 14744 get_ctx(child_ctx); 14742 14745 child_event->ctx = child_ctx; 14743 14746 14744 - pmu_ctx = find_get_pmu_context(child_event->pmu, child_ctx, child_event); 14747 + pmu_ctx = find_get_pmu_context(parent_event->pmu_ctx->pmu, child_ctx, child_event); 14745 14748 if (IS_ERR(pmu_ctx)) { 14746 14749 free_event(child_event); 14747 14750 return ERR_CAST(pmu_ctx);