Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'perf-core-2024-01-08' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull performance events updates from Ingo Molnar:

- Add branch stack counters ABI extension to better capture the growing
amount of information the PMU exposes via branch stack sampling.
There's matching tooling support.

- Fix race when creating the nr_addr_filters sysfs file

- Add Intel Sierra Forest and Grand Ridge intel/cstate PMU support

- Add Intel Granite Rapids, Sierra Forest and Grand Ridge uncore PMU
support

- Misc cleanups & fixes

* tag 'perf-core-2024-01-08' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
perf/x86/intel/uncore: Factor out topology_gidnid_map()
perf/x86/intel/uncore: Fix NULL pointer dereference issue in upi_fill_topology()
perf/x86/amd: Reject branch stack for IBS events
perf/x86/intel/uncore: Support Sierra Forest and Grand Ridge
perf/x86/intel/uncore: Support IIO free-running counters on GNR
perf/x86/intel/uncore: Support Granite Rapids
perf/x86/uncore: Use u64 to replace unsigned for the uncore offsets array
perf/x86/intel/uncore: Generic uncore_get_uncores and MMIO format of SPR
perf: Fix the nr_addr_filters fix
perf/x86/intel/cstate: Add Grand Ridge support
perf/x86/intel/cstate: Add Sierra Forest support
x86/smp: Export symbol cpu_clustergroup_mask()
perf/x86/intel/cstate: Cleanup duplicate attr_groups
perf/core: Fix narrow startup race when creating the perf nr_addr_filters sysfs file
perf/x86/intel: Support branch counters logging
perf/x86/intel: Reorganize attrs and is_visible
perf: Add branch_sample_call_stack
perf/x86: Add PERF_X86_EVENT_NEEDS_BRANCH_STACK flag
perf: Add branch stack counters

+627 -130
+6
Documentation/ABI/testing/sysfs-bus-event_source-devices-caps
··· 16 16 Example output in powerpc: 17 17 grep . /sys/bus/event_source/devices/cpu/caps/* 18 18 /sys/bus/event_source/devices/cpu/caps/pmu_name:POWER9 19 + 20 + The "branch_counter_nr" in the supported platform exposes the 21 + maximum number of counters which can be shown in the u64 counters 22 + of PERF_SAMPLE_BRANCH_COUNTERS, while the "branch_counter_width" 23 + exposes the width of each counter. Both of them can be used by 24 + the perf tool to parse the logged counters in each branch.
+1 -1
arch/powerpc/perf/core-book3s.c
··· 2312 2312 struct cpu_hw_events *cpuhw; 2313 2313 cpuhw = this_cpu_ptr(&cpu_hw_events); 2314 2314 power_pmu_bhrb_read(event, cpuhw); 2315 - perf_sample_save_brstack(&data, event, &cpuhw->bhrb_stack); 2315 + perf_sample_save_brstack(&data, event, &cpuhw->bhrb_stack, NULL); 2316 2316 } 2317 2317 2318 2318 if (event->attr.sample_type & PERF_SAMPLE_DATA_SRC &&
+1 -1
arch/x86/events/amd/core.c
··· 940 940 continue; 941 941 942 942 if (has_branch_stack(event)) 943 - perf_sample_save_brstack(&data, event, &cpuc->lbr_stack); 943 + perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL); 944 944 945 945 if (perf_event_overflow(event, &data, regs)) 946 946 x86_pmu_stop(event, 0);
+3
arch/x86/events/amd/ibs.c
··· 287 287 if (config & ~perf_ibs->config_mask) 288 288 return -EINVAL; 289 289 290 + if (has_branch_stack(event)) 291 + return -EOPNOTSUPP; 292 + 290 293 ret = validate_group(event); 291 294 if (ret) 292 295 return ret;
+2 -2
arch/x86/events/core.c
··· 601 601 } 602 602 } 603 603 604 - if (event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK) 604 + if (branch_sample_call_stack(event)) 605 605 event->attach_state |= PERF_ATTACH_TASK_DATA; 606 606 607 607 /* ··· 1702 1702 perf_sample_data_init(&data, 0, event->hw.last_period); 1703 1703 1704 1704 if (has_branch_stack(event)) 1705 - perf_sample_save_brstack(&data, event, &cpuc->lbr_stack); 1705 + perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL); 1706 1706 1707 1707 if (perf_event_overflow(event, &data, regs)) 1708 1708 x86_pmu_stop(event, 0);
+122 -23
arch/x86/events/intel/core.c
··· 2527 2527 perf_report_aux_output_id(event, idx); 2528 2528 } 2529 2529 2530 + static __always_inline bool intel_pmu_needs_branch_stack(struct perf_event *event) 2531 + { 2532 + return event->hw.flags & PERF_X86_EVENT_NEEDS_BRANCH_STACK; 2533 + } 2534 + 2530 2535 static void intel_pmu_del_event(struct perf_event *event) 2531 2536 { 2532 - if (needs_branch_stack(event)) 2537 + if (intel_pmu_needs_branch_stack(event)) 2533 2538 intel_pmu_lbr_del(event); 2534 2539 if (event->attr.precise_ip) 2535 2540 intel_pmu_pebs_del(event); ··· 2792 2787 2793 2788 static void intel_pmu_enable_event(struct perf_event *event) 2794 2789 { 2790 + u64 enable_mask = ARCH_PERFMON_EVENTSEL_ENABLE; 2795 2791 struct hw_perf_event *hwc = &event->hw; 2796 2792 int idx = hwc->idx; 2797 2793 ··· 2801 2795 2802 2796 switch (idx) { 2803 2797 case 0 ... INTEL_PMC_IDX_FIXED - 1: 2798 + if (branch_sample_counters(event)) 2799 + enable_mask |= ARCH_PERFMON_EVENTSEL_BR_CNTR; 2804 2800 intel_set_masks(event, idx); 2805 - __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); 2801 + __x86_pmu_enable_event(hwc, enable_mask); 2806 2802 break; 2807 2803 case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1: 2808 2804 case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END: ··· 2828 2820 { 2829 2821 if (event->attr.precise_ip) 2830 2822 intel_pmu_pebs_add(event); 2831 - if (needs_branch_stack(event)) 2823 + if (intel_pmu_needs_branch_stack(event)) 2832 2824 intel_pmu_lbr_add(event); 2833 2825 } 2834 2826 ··· 3055 3047 perf_sample_data_init(&data, 0, event->hw.last_period); 3056 3048 3057 3049 if (has_branch_stack(event)) 3058 - perf_sample_save_brstack(&data, event, &cpuc->lbr_stack); 3050 + intel_pmu_lbr_save_brstack(&data, cpuc, event); 3059 3051 3060 3052 if (perf_event_overflow(event, &data, regs)) 3061 3053 x86_pmu_stop(event, 0); ··· 3620 3612 if (cpuc->excl_cntrs) 3621 3613 return intel_get_excl_constraints(cpuc, event, idx, c2); 3622 3614 3615 + /* Not all counters support the branch counter feature. */ 3616 + if (branch_sample_counters(event)) { 3617 + c2 = dyn_constraint(cpuc, c2, idx); 3618 + c2->idxmsk64 &= x86_pmu.lbr_counters; 3619 + c2->weight = hweight64(c2->idxmsk64); 3620 + } 3621 + 3623 3622 return c2; 3624 3623 } 3625 3624 ··· 3912 3897 x86_pmu.pebs_aliases(event); 3913 3898 } 3914 3899 3915 - if (needs_branch_stack(event)) { 3900 + if (needs_branch_stack(event) && is_sampling_event(event)) 3901 + event->hw.flags |= PERF_X86_EVENT_NEEDS_BRANCH_STACK; 3902 + 3903 + if (branch_sample_counters(event)) { 3904 + struct perf_event *leader, *sibling; 3905 + int num = 0; 3906 + 3907 + if (!(x86_pmu.flags & PMU_FL_BR_CNTR) || 3908 + (event->attr.config & ~INTEL_ARCH_EVENT_MASK)) 3909 + return -EINVAL; 3910 + 3911 + /* 3912 + * The branch counter logging is not supported in the call stack 3913 + * mode yet, since we cannot simply flush the LBR during e.g., 3914 + * multiplexing. Also, there is no obvious usage with the call 3915 + * stack mode. Simply forbids it for now. 3916 + * 3917 + * If any events in the group enable the branch counter logging 3918 + * feature, the group is treated as a branch counter logging 3919 + * group, which requires the extra space to store the counters. 3920 + */ 3921 + leader = event->group_leader; 3922 + if (branch_sample_call_stack(leader)) 3923 + return -EINVAL; 3924 + if (branch_sample_counters(leader)) 3925 + num++; 3926 + leader->hw.flags |= PERF_X86_EVENT_BRANCH_COUNTERS; 3927 + 3928 + for_each_sibling_event(sibling, leader) { 3929 + if (branch_sample_call_stack(sibling)) 3930 + return -EINVAL; 3931 + if (branch_sample_counters(sibling)) 3932 + num++; 3933 + } 3934 + 3935 + if (num > fls(x86_pmu.lbr_counters)) 3936 + return -EINVAL; 3937 + /* 3938 + * Only applying the PERF_SAMPLE_BRANCH_COUNTERS doesn't 3939 + * require any branch stack setup. 3940 + * Clear the bit to avoid unnecessary branch stack setup. 3941 + */ 3942 + if (0 == (event->attr.branch_sample_type & 3943 + ~(PERF_SAMPLE_BRANCH_PLM_ALL | 3944 + PERF_SAMPLE_BRANCH_COUNTERS))) 3945 + event->hw.flags &= ~PERF_X86_EVENT_NEEDS_BRANCH_STACK; 3946 + 3947 + /* 3948 + * Force the leader to be a LBR event. So LBRs can be reset 3949 + * with the leader event. See intel_pmu_lbr_del() for details. 3950 + */ 3951 + if (!intel_pmu_needs_branch_stack(leader)) 3952 + return -EINVAL; 3953 + } 3954 + 3955 + if (intel_pmu_needs_branch_stack(event)) { 3916 3956 ret = intel_pmu_setup_lbr_filter(event); 3917 3957 if (ret) 3918 3958 return ret; ··· 4450 4380 */ 4451 4381 if (event->attr.precise_ip == 3) { 4452 4382 /* Force instruction:ppp on PMC0, 1 and Fixed counter 0 */ 4453 - if (constraint_match(&fixed0_constraint, event->hw.config)) 4454 - return &fixed0_counter0_1_constraint; 4383 + if (constraint_match(&fixed0_constraint, event->hw.config)) { 4384 + /* The fixed counter 0 doesn't support LBR event logging. */ 4385 + if (branch_sample_counters(event)) 4386 + return &counter0_1_constraint; 4387 + else 4388 + return &fixed0_counter0_1_constraint; 4389 + } 4455 4390 4456 4391 switch (c->idxmsk64 & 0x3ull) { 4457 4392 case 0x1: ··· 4635 4560 goto err; 4636 4561 } 4637 4562 4638 - if (x86_pmu.flags & (PMU_FL_EXCL_CNTRS | PMU_FL_TFA)) { 4563 + if (x86_pmu.flags & (PMU_FL_EXCL_CNTRS | PMU_FL_TFA | PMU_FL_BR_CNTR)) { 4639 4564 size_t sz = X86_PMC_IDX_MAX * sizeof(struct event_constraint); 4640 4565 4641 4566 cpuc->constraint_list = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu)); ··· 5607 5532 5608 5533 static DEVICE_ATTR_RO(branches); 5609 5534 5535 + static ssize_t branch_counter_nr_show(struct device *cdev, 5536 + struct device_attribute *attr, 5537 + char *buf) 5538 + { 5539 + return snprintf(buf, PAGE_SIZE, "%d\n", fls(x86_pmu.lbr_counters)); 5540 + } 5541 + 5542 + static DEVICE_ATTR_RO(branch_counter_nr); 5543 + 5544 + static ssize_t branch_counter_width_show(struct device *cdev, 5545 + struct device_attribute *attr, 5546 + char *buf) 5547 + { 5548 + return snprintf(buf, PAGE_SIZE, "%d\n", LBR_INFO_BR_CNTR_BITS); 5549 + } 5550 + 5551 + static DEVICE_ATTR_RO(branch_counter_width); 5552 + 5610 5553 static struct attribute *lbr_attrs[] = { 5611 5554 &dev_attr_branches.attr, 5555 + &dev_attr_branch_counter_nr.attr, 5556 + &dev_attr_branch_counter_width.attr, 5612 5557 NULL 5613 5558 }; 5559 + 5560 + static umode_t 5561 + lbr_is_visible(struct kobject *kobj, struct attribute *attr, int i) 5562 + { 5563 + /* branches */ 5564 + if (i == 0) 5565 + return x86_pmu.lbr_nr ? attr->mode : 0; 5566 + 5567 + return (x86_pmu.flags & PMU_FL_BR_CNTR) ? attr->mode : 0; 5568 + } 5614 5569 5615 5570 static char pmu_name_str[30]; 5616 5571 ··· 5669 5564 }; 5670 5565 5671 5566 static umode_t 5567 + default_is_visible(struct kobject *kobj, struct attribute *attr, int i) 5568 + { 5569 + if (attr == &dev_attr_allow_tsx_force_abort.attr) 5570 + return x86_pmu.flags & PMU_FL_TFA ? attr->mode : 0; 5571 + 5572 + return attr->mode; 5573 + } 5574 + 5575 + static umode_t 5672 5576 tsx_is_visible(struct kobject *kobj, struct attribute *attr, int i) 5673 5577 { 5674 5578 return boot_cpu_has(X86_FEATURE_RTM) ? attr->mode : 0; ··· 5699 5585 } 5700 5586 5701 5587 static umode_t 5702 - lbr_is_visible(struct kobject *kobj, struct attribute *attr, int i) 5703 - { 5704 - return x86_pmu.lbr_nr ? attr->mode : 0; 5705 - } 5706 - 5707 - static umode_t 5708 5588 exra_is_visible(struct kobject *kobj, struct attribute *attr, int i) 5709 5589 { 5710 5590 return x86_pmu.version >= 2 ? attr->mode : 0; 5711 - } 5712 - 5713 - static umode_t 5714 - default_is_visible(struct kobject *kobj, struct attribute *attr, int i) 5715 - { 5716 - if (attr == &dev_attr_allow_tsx_force_abort.attr) 5717 - return x86_pmu.flags & PMU_FL_TFA ? attr->mode : 0; 5718 - 5719 - return attr->mode; 5720 5591 } 5721 5592 5722 5593 static struct attribute_group group_events_td = {
+127 -35
arch/x86/events/intel/cstate.c
··· 41 41 * MSR_CORE_C1_RES: CORE C1 Residency Counter 42 42 * perf code: 0x00 43 43 * Available model: SLM,AMT,GLM,CNL,ICX,TNT,ADL,RPL 44 - * MTL 44 + * MTL,SRF,GRR 45 45 * Scope: Core (each processor core has a MSR) 46 46 * MSR_CORE_C3_RESIDENCY: CORE C3 Residency Counter 47 47 * perf code: 0x01 ··· 52 52 * perf code: 0x02 53 53 * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW, 54 54 * SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX, 55 - * TGL,TNT,RKL,ADL,RPL,SPR,MTL 55 + * TGL,TNT,RKL,ADL,RPL,SPR,MTL,SRF, 56 + * GRR 56 57 * Scope: Core 57 58 * MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter 58 59 * perf code: 0x03 ··· 76 75 * perf code: 0x02 77 76 * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW, 78 77 * SKL,KNL,GLM,CNL,KBL,CML,ICL,ICX, 79 - * TGL,TNT,RKL,ADL,RPL,SPR,MTL 78 + * TGL,TNT,RKL,ADL,RPL,SPR,MTL,SRF 80 79 * Scope: Package (physical package) 81 80 * MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter. 82 81 * perf code: 0x03 ··· 98 97 * Available model: HSW ULT,KBL,GLM,CNL,CML,ICL,TGL, 99 98 * TNT,RKL,ADL,RPL,MTL 100 99 * Scope: Package (physical package) 100 + * MSR_MODULE_C6_RES_MS: Module C6 Residency Counter. 101 + * perf code: 0x00 102 + * Available model: SRF,GRR 103 + * Scope: A cluster of cores shared L2 cache 101 104 * 102 105 */ 103 106 ··· 135 130 struct cstate_model { 136 131 unsigned long core_events; 137 132 unsigned long pkg_events; 133 + unsigned long module_events; 138 134 unsigned long quirks; 139 135 }; 140 136 ··· 195 189 * "events" group (with empty attrs) before updating 196 190 * it with detected events. 197 191 */ 198 - static struct attribute_group core_events_attr_group = { 192 + static struct attribute_group cstate_events_attr_group = { 199 193 .name = "events", 200 194 .attrs = attrs_empty, 201 195 }; 202 196 203 - DEFINE_CSTATE_FORMAT_ATTR(core_event, event, "config:0-63"); 204 - static struct attribute *core_format_attrs[] = { 205 - &format_attr_core_event.attr, 197 + DEFINE_CSTATE_FORMAT_ATTR(cstate_event, event, "config:0-63"); 198 + static struct attribute *cstate_format_attrs[] = { 199 + &format_attr_cstate_event.attr, 206 200 NULL, 207 201 }; 208 202 209 - static struct attribute_group core_format_attr_group = { 203 + static struct attribute_group cstate_format_attr_group = { 210 204 .name = "format", 211 - .attrs = core_format_attrs, 205 + .attrs = cstate_format_attrs, 212 206 }; 213 207 214 208 static cpumask_t cstate_core_cpu_mask; ··· 223 217 .attrs = cstate_cpumask_attrs, 224 218 }; 225 219 226 - static const struct attribute_group *core_attr_groups[] = { 227 - &core_events_attr_group, 228 - &core_format_attr_group, 220 + static const struct attribute_group *cstate_attr_groups[] = { 221 + &cstate_events_attr_group, 222 + &cstate_format_attr_group, 229 223 &cpumask_attr_group, 230 224 NULL, 231 225 }; ··· 274 268 [PERF_CSTATE_PKG_C10_RES] = { MSR_PKG_C10_RESIDENCY, &group_cstate_pkg_c10, test_msr }, 275 269 }; 276 270 277 - static struct attribute_group pkg_events_attr_group = { 278 - .name = "events", 279 - .attrs = attrs_empty, 280 - }; 281 - 282 - DEFINE_CSTATE_FORMAT_ATTR(pkg_event, event, "config:0-63"); 283 - static struct attribute *pkg_format_attrs[] = { 284 - &format_attr_pkg_event.attr, 285 - NULL, 286 - }; 287 - static struct attribute_group pkg_format_attr_group = { 288 - .name = "format", 289 - .attrs = pkg_format_attrs, 290 - }; 291 - 292 271 static cpumask_t cstate_pkg_cpu_mask; 293 272 294 - static const struct attribute_group *pkg_attr_groups[] = { 295 - &pkg_events_attr_group, 296 - &pkg_format_attr_group, 297 - &cpumask_attr_group, 298 - NULL, 273 + /* cstate_module PMU */ 274 + static struct pmu cstate_module_pmu; 275 + static bool has_cstate_module; 276 + 277 + enum perf_cstate_module_events { 278 + PERF_CSTATE_MODULE_C6_RES = 0, 279 + 280 + PERF_CSTATE_MODULE_EVENT_MAX, 299 281 }; 282 + 283 + PMU_EVENT_ATTR_STRING(c6-residency, attr_cstate_module_c6, "event=0x00"); 284 + 285 + static unsigned long module_msr_mask; 286 + 287 + PMU_EVENT_GROUP(events, cstate_module_c6); 288 + 289 + static struct perf_msr module_msr[] = { 290 + [PERF_CSTATE_MODULE_C6_RES] = { MSR_MODULE_C6_RES_MS, &group_cstate_module_c6, test_msr }, 291 + }; 292 + 293 + static cpumask_t cstate_module_cpu_mask; 300 294 301 295 static ssize_t cstate_get_attr_cpumask(struct device *dev, 302 296 struct device_attribute *attr, ··· 308 302 return cpumap_print_to_pagebuf(true, buf, &cstate_core_cpu_mask); 309 303 else if (pmu == &cstate_pkg_pmu) 310 304 return cpumap_print_to_pagebuf(true, buf, &cstate_pkg_cpu_mask); 305 + else if (pmu == &cstate_module_pmu) 306 + return cpumap_print_to_pagebuf(true, buf, &cstate_module_cpu_mask); 311 307 else 312 308 return 0; 313 309 } ··· 350 342 event->hw.event_base = pkg_msr[cfg].msr; 351 343 cpu = cpumask_any_and(&cstate_pkg_cpu_mask, 352 344 topology_die_cpumask(event->cpu)); 345 + } else if (event->pmu == &cstate_module_pmu) { 346 + if (cfg >= PERF_CSTATE_MODULE_EVENT_MAX) 347 + return -EINVAL; 348 + cfg = array_index_nospec((unsigned long)cfg, PERF_CSTATE_MODULE_EVENT_MAX); 349 + if (!(module_msr_mask & (1 << cfg))) 350 + return -EINVAL; 351 + event->hw.event_base = module_msr[cfg].msr; 352 + cpu = cpumask_any_and(&cstate_module_cpu_mask, 353 + topology_cluster_cpumask(event->cpu)); 353 354 } else { 354 355 return -ENOENT; 355 356 } ··· 446 429 perf_pmu_migrate_context(&cstate_pkg_pmu, cpu, target); 447 430 } 448 431 } 432 + 433 + if (has_cstate_module && 434 + cpumask_test_and_clear_cpu(cpu, &cstate_module_cpu_mask)) { 435 + 436 + target = cpumask_any_but(topology_cluster_cpumask(cpu), cpu); 437 + /* Migrate events if there is a valid target */ 438 + if (target < nr_cpu_ids) { 439 + cpumask_set_cpu(target, &cstate_module_cpu_mask); 440 + perf_pmu_migrate_context(&cstate_module_pmu, cpu, target); 441 + } 442 + } 449 443 return 0; 450 444 } 451 445 ··· 483 455 if (has_cstate_pkg && target >= nr_cpu_ids) 484 456 cpumask_set_cpu(cpu, &cstate_pkg_cpu_mask); 485 457 458 + /* 459 + * If this is the first online thread of that cluster, set it 460 + * in the cluster cpu mask as the designated reader. 461 + */ 462 + target = cpumask_any_and(&cstate_module_cpu_mask, 463 + topology_cluster_cpumask(cpu)); 464 + if (has_cstate_module && target >= nr_cpu_ids) 465 + cpumask_set_cpu(cpu, &cstate_module_cpu_mask); 466 + 486 467 return 0; 487 468 } 488 469 ··· 514 477 NULL, 515 478 }; 516 479 480 + static const struct attribute_group *module_attr_update[] = { 481 + &group_cstate_module_c6, 482 + NULL 483 + }; 484 + 517 485 static struct pmu cstate_core_pmu = { 518 - .attr_groups = core_attr_groups, 486 + .attr_groups = cstate_attr_groups, 519 487 .attr_update = core_attr_update, 520 488 .name = "cstate_core", 521 489 .task_ctx_nr = perf_invalid_context, ··· 535 493 }; 536 494 537 495 static struct pmu cstate_pkg_pmu = { 538 - .attr_groups = pkg_attr_groups, 496 + .attr_groups = cstate_attr_groups, 539 497 .attr_update = pkg_attr_update, 540 498 .name = "cstate_pkg", 499 + .task_ctx_nr = perf_invalid_context, 500 + .event_init = cstate_pmu_event_init, 501 + .add = cstate_pmu_event_add, 502 + .del = cstate_pmu_event_del, 503 + .start = cstate_pmu_event_start, 504 + .stop = cstate_pmu_event_stop, 505 + .read = cstate_pmu_event_update, 506 + .capabilities = PERF_PMU_CAP_NO_INTERRUPT | PERF_PMU_CAP_NO_EXCLUDE, 507 + .module = THIS_MODULE, 508 + }; 509 + 510 + static struct pmu cstate_module_pmu = { 511 + .attr_groups = cstate_attr_groups, 512 + .attr_update = module_attr_update, 513 + .name = "cstate_module", 541 514 .task_ctx_nr = perf_invalid_context, 542 515 .event_init = cstate_pmu_event_init, 543 516 .add = cstate_pmu_event_add, ··· 678 621 BIT(PERF_CSTATE_PKG_C10_RES), 679 622 }; 680 623 624 + static const struct cstate_model grr_cstates __initconst = { 625 + .core_events = BIT(PERF_CSTATE_CORE_C1_RES) | 626 + BIT(PERF_CSTATE_CORE_C6_RES), 627 + 628 + .module_events = BIT(PERF_CSTATE_MODULE_C6_RES), 629 + }; 630 + 631 + static const struct cstate_model srf_cstates __initconst = { 632 + .core_events = BIT(PERF_CSTATE_CORE_C1_RES) | 633 + BIT(PERF_CSTATE_CORE_C6_RES), 634 + 635 + .pkg_events = BIT(PERF_CSTATE_PKG_C6_RES), 636 + 637 + .module_events = BIT(PERF_CSTATE_MODULE_C6_RES), 638 + }; 639 + 681 640 682 641 static const struct x86_cpu_id intel_cstates_match[] __initconst = { 683 642 X86_MATCH_INTEL_FAM6_MODEL(NEHALEM, &nhm_cstates), ··· 746 673 X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT, &glm_cstates), 747 674 X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L, &glm_cstates), 748 675 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT, &adl_cstates), 676 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_CRESTMONT_X, &srf_cstates), 677 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_CRESTMONT, &grr_cstates), 749 678 750 679 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, &icl_cstates), 751 680 X86_MATCH_INTEL_FAM6_MODEL(ICELAKE, &icl_cstates), ··· 789 714 pkg_msr_mask = perf_msr_probe(pkg_msr, PERF_CSTATE_PKG_EVENT_MAX, 790 715 true, (void *) &cm->pkg_events); 791 716 717 + module_msr_mask = perf_msr_probe(module_msr, PERF_CSTATE_MODULE_EVENT_MAX, 718 + true, (void *) &cm->module_events); 719 + 792 720 has_cstate_core = !!core_msr_mask; 793 721 has_cstate_pkg = !!pkg_msr_mask; 722 + has_cstate_module = !!module_msr_mask; 794 723 795 - return (has_cstate_core || has_cstate_pkg) ? 0 : -ENODEV; 724 + return (has_cstate_core || has_cstate_pkg || has_cstate_module) ? 0 : -ENODEV; 796 725 } 797 726 798 727 static inline void cstate_cleanup(void) ··· 809 730 810 731 if (has_cstate_pkg) 811 732 perf_pmu_unregister(&cstate_pkg_pmu); 733 + 734 + if (has_cstate_module) 735 + perf_pmu_unregister(&cstate_module_pmu); 812 736 } 813 737 814 738 static int __init cstate_init(void) ··· 844 762 if (err) { 845 763 has_cstate_pkg = false; 846 764 pr_info("Failed to register cstate pkg pmu\n"); 765 + cstate_cleanup(); 766 + return err; 767 + } 768 + } 769 + 770 + if (has_cstate_module) { 771 + err = perf_pmu_register(&cstate_module_pmu, cstate_module_pmu.name, -1); 772 + if (err) { 773 + has_cstate_module = false; 774 + pr_info("Failed to register cstate cluster pmu\n"); 847 775 cstate_cleanup(); 848 776 return err; 849 777 }
+2 -2
arch/x86/events/intel/ds.c
··· 1755 1755 setup_pebs_time(event, data, pebs->tsc); 1756 1756 1757 1757 if (has_branch_stack(event)) 1758 - perf_sample_save_brstack(data, event, &cpuc->lbr_stack); 1758 + perf_sample_save_brstack(data, event, &cpuc->lbr_stack, NULL); 1759 1759 } 1760 1760 1761 1761 static void adaptive_pebs_save_regs(struct pt_regs *regs, ··· 1912 1912 1913 1913 if (has_branch_stack(event)) { 1914 1914 intel_pmu_store_pebs_lbrs(lbr); 1915 - perf_sample_save_brstack(data, event, &cpuc->lbr_stack); 1915 + intel_pmu_lbr_save_brstack(data, cpuc, event); 1916 1916 } 1917 1917 } 1918 1918
+84 -1
arch/x86/events/intel/lbr.c
··· 676 676 WARN_ON_ONCE(cpuc->lbr_users < 0); 677 677 WARN_ON_ONCE(cpuc->lbr_pebs_users < 0); 678 678 perf_sched_cb_dec(event->pmu); 679 + 680 + /* 681 + * The logged occurrences information is only valid for the 682 + * current LBR group. If another LBR group is scheduled in 683 + * later, the information from the stale LBRs will be wrongly 684 + * interpreted. Reset the LBRs here. 685 + * 686 + * Only clear once for a branch counter group with the leader 687 + * event. Because 688 + * - Cannot simply reset the LBRs with the !cpuc->lbr_users. 689 + * Because it's possible that the last LBR user is not in a 690 + * branch counter group, e.g., a branch_counters group + 691 + * several normal LBR events. 692 + * - The LBR reset can be done with any one of the events in a 693 + * branch counter group, since they are always scheduled together. 694 + * It's easy to force the leader event an LBR event. 695 + */ 696 + if (is_branch_counters_group(event) && event == event->group_leader) 697 + intel_pmu_lbr_reset(); 679 698 } 680 699 681 700 static inline bool vlbr_exclude_host(void) ··· 885 866 return cycles; 886 867 } 887 868 869 + static_assert((64 - PERF_BRANCH_ENTRY_INFO_BITS_MAX) > LBR_INFO_BR_CNTR_NUM * LBR_INFO_BR_CNTR_BITS); 870 + 888 871 static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc, 889 872 struct lbr_entry *entries) 890 873 { ··· 919 898 e->abort = !!(info & LBR_INFO_ABORT); 920 899 e->cycles = get_lbr_cycles(info); 921 900 e->type = get_lbr_br_type(info); 901 + 902 + /* 903 + * Leverage the reserved field of cpuc->lbr_entries[i] to 904 + * temporarily store the branch counters information. 905 + * The later code will decide what content can be disclosed 906 + * to the perf tool. Pleae see intel_pmu_lbr_counters_reorder(). 907 + */ 908 + e->reserved = (info >> LBR_INFO_BR_CNTR_OFFSET) & LBR_INFO_BR_CNTR_FULL_MASK; 922 909 } 923 910 924 911 cpuc->lbr_stack.nr = i; 912 + } 913 + 914 + /* 915 + * The enabled order may be different from the counter order. 916 + * Update the lbr_counters with the enabled order. 917 + */ 918 + static void intel_pmu_lbr_counters_reorder(struct cpu_hw_events *cpuc, 919 + struct perf_event *event) 920 + { 921 + int i, j, pos = 0, order[X86_PMC_IDX_MAX]; 922 + struct perf_event *leader, *sibling; 923 + u64 src, dst, cnt; 924 + 925 + leader = event->group_leader; 926 + if (branch_sample_counters(leader)) 927 + order[pos++] = leader->hw.idx; 928 + 929 + for_each_sibling_event(sibling, leader) { 930 + if (!branch_sample_counters(sibling)) 931 + continue; 932 + order[pos++] = sibling->hw.idx; 933 + } 934 + 935 + WARN_ON_ONCE(!pos); 936 + 937 + for (i = 0; i < cpuc->lbr_stack.nr; i++) { 938 + src = cpuc->lbr_entries[i].reserved; 939 + dst = 0; 940 + for (j = 0; j < pos; j++) { 941 + cnt = (src >> (order[j] * LBR_INFO_BR_CNTR_BITS)) & LBR_INFO_BR_CNTR_MASK; 942 + dst |= cnt << j * LBR_INFO_BR_CNTR_BITS; 943 + } 944 + cpuc->lbr_counters[i] = dst; 945 + cpuc->lbr_entries[i].reserved = 0; 946 + } 947 + } 948 + 949 + void intel_pmu_lbr_save_brstack(struct perf_sample_data *data, 950 + struct cpu_hw_events *cpuc, 951 + struct perf_event *event) 952 + { 953 + if (is_branch_counters_group(event)) { 954 + intel_pmu_lbr_counters_reorder(cpuc, event); 955 + perf_sample_save_brstack(data, event, &cpuc->lbr_stack, cpuc->lbr_counters); 956 + return; 957 + } 958 + 959 + perf_sample_save_brstack(data, event, &cpuc->lbr_stack, NULL); 925 960 } 926 961 927 962 static void intel_pmu_arch_lbr_read(struct cpu_hw_events *cpuc) ··· 1250 1173 for (i = 0; i < cpuc->lbr_stack.nr; ) { 1251 1174 if (!cpuc->lbr_entries[i].from) { 1252 1175 j = i; 1253 - while (++j < cpuc->lbr_stack.nr) 1176 + while (++j < cpuc->lbr_stack.nr) { 1254 1177 cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j]; 1178 + cpuc->lbr_counters[j-1] = cpuc->lbr_counters[j]; 1179 + } 1255 1180 cpuc->lbr_stack.nr--; 1256 1181 if (!cpuc->lbr_entries[i].from) 1257 1182 continue; ··· 1604 1525 x86_pmu.lbr_mispred = ecx.split.lbr_mispred; 1605 1526 x86_pmu.lbr_timed_lbr = ecx.split.lbr_timed_lbr; 1606 1527 x86_pmu.lbr_br_type = ecx.split.lbr_br_type; 1528 + x86_pmu.lbr_counters = ecx.split.lbr_counters; 1607 1529 x86_pmu.lbr_nr = lbr_nr; 1530 + 1531 + if (!!x86_pmu.lbr_counters) 1532 + x86_pmu.flags |= PMU_FL_BR_CNTR; 1608 1533 1609 1534 if (x86_pmu.lbr_mispred) 1610 1535 static_branch_enable(&x86_lbr_mispred);
+12
arch/x86/events/intel/uncore.c
··· 1814 1814 .uncore_units_ignore = spr_uncore_units_ignore, 1815 1815 }; 1816 1816 1817 + static const struct intel_uncore_init_fun gnr_uncore_init __initconst = { 1818 + .cpu_init = gnr_uncore_cpu_init, 1819 + .pci_init = gnr_uncore_pci_init, 1820 + .mmio_init = gnr_uncore_mmio_init, 1821 + .use_discovery = true, 1822 + .uncore_units_ignore = gnr_uncore_units_ignore, 1823 + }; 1824 + 1817 1825 static const struct intel_uncore_init_fun generic_uncore_init __initconst = { 1818 1826 .cpu_init = intel_uncore_generic_uncore_cpu_init, 1819 1827 .pci_init = intel_uncore_generic_uncore_pci_init, ··· 1873 1865 X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, &mtl_uncore_init), 1874 1866 X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &spr_uncore_init), 1875 1867 X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &spr_uncore_init), 1868 + X86_MATCH_INTEL_FAM6_MODEL(GRANITERAPIDS_X, &gnr_uncore_init), 1869 + X86_MATCH_INTEL_FAM6_MODEL(GRANITERAPIDS_D, &gnr_uncore_init), 1876 1870 X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &snr_uncore_init), 1877 1871 X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT, &adl_uncore_init), 1872 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_CRESTMONT_X, &gnr_uncore_init), 1873 + X86_MATCH_INTEL_FAM6_MODEL(ATOM_CRESTMONT, &gnr_uncore_init), 1878 1874 {}, 1879 1875 }; 1880 1876 MODULE_DEVICE_TABLE(x86cpu, intel_uncore_match);
+7 -3
arch/x86/events/intel/uncore.h
··· 72 72 unsigned single_fixed:1; 73 73 unsigned pair_ctr_ctl:1; 74 74 union { 75 - unsigned *msr_offsets; 76 - unsigned *pci_offsets; 77 - unsigned *mmio_offsets; 75 + u64 *msr_offsets; 76 + u64 *pci_offsets; 77 + u64 *mmio_offsets; 78 78 }; 79 79 unsigned *box_ids; 80 80 struct event_constraint unconstrainted; ··· 593 593 extern struct pci_extra_dev *uncore_extra_pci_dev; 594 594 extern struct event_constraint uncore_constraint_empty; 595 595 extern int spr_uncore_units_ignore[]; 596 + extern int gnr_uncore_units_ignore[]; 596 597 597 598 /* uncore_snb.c */ 598 599 int snb_uncore_pci_init(void); ··· 635 634 int spr_uncore_pci_init(void); 636 635 void spr_uncore_cpu_init(void); 637 636 void spr_uncore_mmio_init(void); 637 + int gnr_uncore_pci_init(void); 638 + void gnr_uncore_cpu_init(void); 639 + void gnr_uncore_mmio_init(void); 638 640 639 641 /* uncore_nhmex.c */ 640 642 void nhmex_uncore_cpu_init(void);
+3 -2
arch/x86/events/intel/uncore_discovery.c
··· 125 125 int die, bool parsed) 126 126 { 127 127 struct intel_uncore_discovery_type *type; 128 - unsigned int *box_offset, *ids; 128 + unsigned int *ids; 129 + u64 *box_offset; 129 130 int i; 130 131 131 132 if (!unit->ctl || !unit->ctl_offset || !unit->ctr_offset) { ··· 154 153 if (!type) 155 154 return; 156 155 157 - box_offset = kcalloc(type->num_boxes + 1, sizeof(unsigned int), GFP_KERNEL); 156 + box_offset = kcalloc(type->num_boxes + 1, sizeof(u64), GFP_KERNEL); 158 157 if (!box_offset) 159 158 return; 160 159
+1 -1
arch/x86/events/intel/uncore_discovery.h
··· 125 125 u8 ctr_offset; /* Counter 0 offset */ 126 126 u16 num_boxes; /* number of boxes for the uncore block */ 127 127 unsigned int *ids; /* Box IDs */ 128 - unsigned int *box_offset; /* Box offset */ 128 + u64 *box_offset; /* Box offset */ 129 129 }; 130 130 131 131 bool intel_uncore_has_discovery_tables(int *ignore);
+1 -1
arch/x86/events/intel/uncore_nhmex.c
··· 306 306 }; 307 307 308 308 /* msr offset for each instance of cbox */ 309 - static unsigned nhmex_cbox_msr_offsets[] = { 309 + static u64 nhmex_cbox_msr_offsets[] = { 310 310 0x0, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0, 0x240, 0x2c0, 311 311 }; 312 312
+163 -45
arch/x86/events/intel/uncore_snbep.c
··· 1396 1396 return ret; 1397 1397 } 1398 1398 1399 + static int topology_gidnid_map(int nodeid, u32 gidnid) 1400 + { 1401 + int i, die_id = -1; 1402 + 1403 + /* 1404 + * every three bits in the Node ID mapping register maps 1405 + * to a particular node. 1406 + */ 1407 + for (i = 0; i < 8; i++) { 1408 + if (nodeid == GIDNIDMAP(gidnid, i)) { 1409 + if (topology_max_die_per_package() > 1) 1410 + die_id = i; 1411 + else 1412 + die_id = topology_phys_to_logical_pkg(i); 1413 + if (die_id < 0) 1414 + die_id = -ENODEV; 1415 + break; 1416 + } 1417 + } 1418 + 1419 + return die_id; 1420 + } 1421 + 1399 1422 /* 1400 1423 * build pci bus to socket mapping 1401 1424 */ ··· 1458 1435 break; 1459 1436 } 1460 1437 1461 - /* 1462 - * every three bits in the Node ID mapping register maps 1463 - * to a particular node. 1464 - */ 1465 - for (i = 0; i < 8; i++) { 1466 - if (nodeid == GIDNIDMAP(config, i)) { 1467 - if (topology_max_die_per_package() > 1) 1468 - die_id = i; 1469 - else 1470 - die_id = topology_phys_to_logical_pkg(i); 1471 - if (die_id < 0) 1472 - die_id = -ENODEV; 1473 - map->pbus_to_dieid[bus] = die_id; 1474 - break; 1475 - } 1476 - } 1438 + map->pbus_to_dieid[bus] = topology_gidnid_map(nodeid, config); 1477 1439 raw_spin_unlock(&pci2phy_map_lock); 1478 1440 } else { 1479 1441 segment = pci_domain_nr(ubox_dev->bus); ··· 5286 5278 5287 5279 /* ICX uncore support */ 5288 5280 5289 - static unsigned icx_cha_msr_offsets[] = { 5281 + static u64 icx_cha_msr_offsets[] = { 5290 5282 0x2a0, 0x2ae, 0x2bc, 0x2ca, 0x2d8, 0x2e6, 0x2f4, 0x302, 0x310, 5291 5283 0x31e, 0x32c, 0x33a, 0x348, 0x356, 0x364, 0x372, 0x380, 0x38e, 5292 5284 0x3aa, 0x3b8, 0x3c6, 0x3d4, 0x3e2, 0x3f0, 0x3fe, 0x40c, 0x41a, ··· 5334 5326 .format_group = &snr_uncore_chabox_format_group, 5335 5327 }; 5336 5328 5337 - static unsigned icx_msr_offsets[] = { 5329 + static u64 icx_msr_offsets[] = { 5338 5330 0x0, 0x20, 0x40, 0x90, 0xb0, 0xd0, 5339 5331 }; 5340 5332 ··· 5604 5596 struct pci_dev *ubox = NULL; 5605 5597 struct pci_dev *dev = NULL; 5606 5598 u32 nid, gid; 5607 - int i, idx, ret = -EPERM; 5599 + int idx, lgc_pkg, ret = -EPERM; 5608 5600 struct intel_uncore_topology *upi; 5609 5601 unsigned int devfn; 5610 5602 ··· 5619 5611 break; 5620 5612 } 5621 5613 5622 - for (i = 0; i < 8; i++) { 5623 - if (nid != GIDNIDMAP(gid, i)) 5624 - continue; 5625 - for (idx = 0; idx < type->num_boxes; idx++) { 5626 - upi = &type->topology[nid][idx]; 5627 - devfn = PCI_DEVFN(dev_link0 + idx, ICX_UPI_REGS_ADDR_FUNCTION); 5628 - dev = pci_get_domain_bus_and_slot(pci_domain_nr(ubox->bus), 5629 - ubox->bus->number, 5630 - devfn); 5631 - if (dev) { 5632 - ret = upi_fill_topology(dev, upi, idx); 5633 - if (ret) 5634 - goto err; 5635 - } 5614 + lgc_pkg = topology_gidnid_map(nid, gid); 5615 + if (lgc_pkg < 0) { 5616 + ret = -EPERM; 5617 + goto err; 5618 + } 5619 + for (idx = 0; idx < type->num_boxes; idx++) { 5620 + upi = &type->topology[lgc_pkg][idx]; 5621 + devfn = PCI_DEVFN(dev_link0 + idx, ICX_UPI_REGS_ADDR_FUNCTION); 5622 + dev = pci_get_domain_bus_and_slot(pci_domain_nr(ubox->bus), 5623 + ubox->bus->number, 5624 + devfn); 5625 + if (dev) { 5626 + ret = upi_fill_topology(dev, upi, idx); 5627 + if (ret) 5628 + goto err; 5636 5629 } 5637 5630 } 5638 5631 } ··· 6088 6079 { /* end: all zeroes */ }, 6089 6080 }; 6090 6081 6082 + #define SPR_UNCORE_MMIO_COMMON_FORMAT() \ 6083 + SPR_UNCORE_COMMON_FORMAT(), \ 6084 + .ops = &spr_uncore_mmio_ops 6085 + 6091 6086 static struct intel_uncore_type spr_uncore_imc = { 6092 - SPR_UNCORE_COMMON_FORMAT(), 6087 + SPR_UNCORE_MMIO_COMMON_FORMAT(), 6093 6088 .name = "imc", 6094 6089 .fixed_ctr_bits = 48, 6095 6090 .fixed_ctr = SNR_IMC_MMIO_PMON_FIXED_CTR, 6096 6091 .fixed_ctl = SNR_IMC_MMIO_PMON_FIXED_CTL, 6097 - .ops = &spr_uncore_mmio_ops, 6098 6092 .event_descs = spr_uncore_imc_events, 6099 6093 }; 6100 6094 ··· 6193 6181 */ 6194 6182 #define SPR_UNCORE_UPI_NUM_BOXES 4 6195 6183 6196 - static unsigned int spr_upi_pci_offsets[SPR_UNCORE_UPI_NUM_BOXES] = { 6184 + static u64 spr_upi_pci_offsets[SPR_UNCORE_UPI_NUM_BOXES] = { 6197 6185 0, 0x8000, 0x10000, 0x18000 6198 6186 }; 6199 6187 ··· 6424 6412 6425 6413 static struct intel_uncore_type ** 6426 6414 uncore_get_uncores(enum uncore_access_type type_id, int num_extra, 6427 - struct intel_uncore_type **extra) 6415 + struct intel_uncore_type **extra, int max_num_types, 6416 + struct intel_uncore_type **uncores) 6428 6417 { 6429 6418 struct intel_uncore_type **types, **start_types; 6430 6419 int i; ··· 6434 6421 6435 6422 /* Only copy the customized features */ 6436 6423 for (; *types; types++) { 6437 - if ((*types)->type_id >= UNCORE_SPR_NUM_UNCORE_TYPES) 6424 + if ((*types)->type_id >= max_num_types) 6438 6425 continue; 6439 - uncore_type_customized_copy(*types, spr_uncores[(*types)->type_id]); 6426 + uncore_type_customized_copy(*types, uncores[(*types)->type_id]); 6440 6427 } 6441 6428 6442 6429 for (i = 0; i < num_extra; i++, types++) ··· 6483 6470 6484 6471 uncore_msr_uncores = uncore_get_uncores(UNCORE_ACCESS_MSR, 6485 6472 UNCORE_SPR_MSR_EXTRA_UNCORES, 6486 - spr_msr_uncores); 6473 + spr_msr_uncores, 6474 + UNCORE_SPR_NUM_UNCORE_TYPES, 6475 + spr_uncores); 6487 6476 6488 6477 type = uncore_find_type_by_id(uncore_msr_uncores, UNCORE_SPR_CHA); 6489 6478 if (type) { ··· 6567 6552 spr_update_device_location(UNCORE_SPR_M3UPI); 6568 6553 uncore_pci_uncores = uncore_get_uncores(UNCORE_ACCESS_PCI, 6569 6554 UNCORE_SPR_PCI_EXTRA_UNCORES, 6570 - spr_pci_uncores); 6555 + spr_pci_uncores, 6556 + UNCORE_SPR_NUM_UNCORE_TYPES, 6557 + spr_uncores); 6571 6558 return 0; 6572 6559 } 6573 6560 ··· 6577 6560 { 6578 6561 int ret = snbep_pci2phy_map_init(0x3250, SKX_CPUNODEID, SKX_GIDNIDMAP, true); 6579 6562 6580 - if (ret) 6581 - uncore_mmio_uncores = uncore_get_uncores(UNCORE_ACCESS_MMIO, 0, NULL); 6582 - else { 6563 + if (ret) { 6564 + uncore_mmio_uncores = uncore_get_uncores(UNCORE_ACCESS_MMIO, 0, NULL, 6565 + UNCORE_SPR_NUM_UNCORE_TYPES, 6566 + spr_uncores); 6567 + } else { 6583 6568 uncore_mmio_uncores = uncore_get_uncores(UNCORE_ACCESS_MMIO, 6584 6569 UNCORE_SPR_MMIO_EXTRA_UNCORES, 6585 - spr_mmio_uncores); 6570 + spr_mmio_uncores, 6571 + UNCORE_SPR_NUM_UNCORE_TYPES, 6572 + spr_uncores); 6586 6573 6587 6574 spr_uncore_imc_free_running.num_boxes = uncore_type_max_boxes(uncore_mmio_uncores, UNCORE_SPR_IMC) / 2; 6588 6575 } 6589 6576 } 6590 6577 6591 6578 /* end of SPR uncore support */ 6579 + 6580 + /* GNR uncore support */ 6581 + 6582 + #define UNCORE_GNR_NUM_UNCORE_TYPES 23 6583 + #define UNCORE_GNR_TYPE_15 15 6584 + #define UNCORE_GNR_B2UPI 18 6585 + #define UNCORE_GNR_TYPE_21 21 6586 + #define UNCORE_GNR_TYPE_22 22 6587 + 6588 + int gnr_uncore_units_ignore[] = { 6589 + UNCORE_SPR_UPI, 6590 + UNCORE_GNR_TYPE_15, 6591 + UNCORE_GNR_B2UPI, 6592 + UNCORE_GNR_TYPE_21, 6593 + UNCORE_GNR_TYPE_22, 6594 + UNCORE_IGNORE_END 6595 + }; 6596 + 6597 + static struct intel_uncore_type gnr_uncore_ubox = { 6598 + .name = "ubox", 6599 + .attr_update = uncore_alias_groups, 6600 + }; 6601 + 6602 + static struct intel_uncore_type gnr_uncore_b2cmi = { 6603 + SPR_UNCORE_PCI_COMMON_FORMAT(), 6604 + .name = "b2cmi", 6605 + }; 6606 + 6607 + static struct intel_uncore_type gnr_uncore_b2cxl = { 6608 + SPR_UNCORE_MMIO_COMMON_FORMAT(), 6609 + .name = "b2cxl", 6610 + }; 6611 + 6612 + static struct intel_uncore_type gnr_uncore_mdf_sbo = { 6613 + .name = "mdf_sbo", 6614 + .attr_update = uncore_alias_groups, 6615 + }; 6616 + 6617 + static struct intel_uncore_type *gnr_uncores[UNCORE_GNR_NUM_UNCORE_TYPES] = { 6618 + &spr_uncore_chabox, 6619 + &spr_uncore_iio, 6620 + &spr_uncore_irp, 6621 + NULL, 6622 + &spr_uncore_pcu, 6623 + &gnr_uncore_ubox, 6624 + &spr_uncore_imc, 6625 + NULL, 6626 + NULL, 6627 + NULL, 6628 + NULL, 6629 + NULL, 6630 + NULL, 6631 + NULL, 6632 + NULL, 6633 + NULL, 6634 + &gnr_uncore_b2cmi, 6635 + &gnr_uncore_b2cxl, 6636 + NULL, 6637 + NULL, 6638 + &gnr_uncore_mdf_sbo, 6639 + NULL, 6640 + NULL, 6641 + }; 6642 + 6643 + static struct freerunning_counters gnr_iio_freerunning[] = { 6644 + [SPR_IIO_MSR_IOCLK] = { 0x290e, 0x01, 0x10, 1, 48 }, 6645 + [SPR_IIO_MSR_BW_IN] = { 0x360e, 0x10, 0x80, 8, 48 }, 6646 + [SPR_IIO_MSR_BW_OUT] = { 0x2e0e, 0x10, 0x80, 8, 48 }, 6647 + }; 6648 + 6649 + void gnr_uncore_cpu_init(void) 6650 + { 6651 + uncore_msr_uncores = uncore_get_uncores(UNCORE_ACCESS_MSR, 6652 + UNCORE_SPR_MSR_EXTRA_UNCORES, 6653 + spr_msr_uncores, 6654 + UNCORE_GNR_NUM_UNCORE_TYPES, 6655 + gnr_uncores); 6656 + spr_uncore_iio_free_running.num_boxes = uncore_type_max_boxes(uncore_msr_uncores, UNCORE_SPR_IIO); 6657 + spr_uncore_iio_free_running.freerunning = gnr_iio_freerunning; 6658 + } 6659 + 6660 + int gnr_uncore_pci_init(void) 6661 + { 6662 + uncore_pci_uncores = uncore_get_uncores(UNCORE_ACCESS_PCI, 0, NULL, 6663 + UNCORE_GNR_NUM_UNCORE_TYPES, 6664 + gnr_uncores); 6665 + return 0; 6666 + } 6667 + 6668 + void gnr_uncore_mmio_init(void) 6669 + { 6670 + uncore_mmio_uncores = uncore_get_uncores(UNCORE_ACCESS_MMIO, 0, NULL, 6671 + UNCORE_GNR_NUM_UNCORE_TYPES, 6672 + gnr_uncores); 6673 + } 6674 + 6675 + /* end of GNR uncore support */
+12
arch/x86/events/perf_event.h
··· 110 110 return is_metric_event(event) || is_slots_event(event); 111 111 } 112 112 113 + static inline bool is_branch_counters_group(struct perf_event *event) 114 + { 115 + return event->group_leader->hw.flags & PERF_X86_EVENT_BRANCH_COUNTERS; 116 + } 117 + 113 118 struct amd_nb { 114 119 int nb_id; /* NorthBridge id */ 115 120 int refcnt; /* reference count */ ··· 288 283 int lbr_pebs_users; 289 284 struct perf_branch_stack lbr_stack; 290 285 struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; 286 + u64 lbr_counters[MAX_LBR_ENTRIES]; /* branch stack extra */ 291 287 union { 292 288 struct er_account *lbr_sel; 293 289 struct er_account *lbr_ctl; ··· 894 888 unsigned int lbr_mispred:1; 895 889 unsigned int lbr_timed_lbr:1; 896 890 unsigned int lbr_br_type:1; 891 + unsigned int lbr_counters:4; 897 892 898 893 void (*lbr_reset)(void); 899 894 void (*lbr_read)(struct cpu_hw_events *cpuc); ··· 1019 1012 #define PMU_FL_INSTR_LATENCY 0x80 /* Support Instruction Latency in PEBS Memory Info Record */ 1020 1013 #define PMU_FL_MEM_LOADS_AUX 0x100 /* Require an auxiliary event for the complete memory info */ 1021 1014 #define PMU_FL_RETIRE_LATENCY 0x200 /* Support Retire Latency in PEBS */ 1015 + #define PMU_FL_BR_CNTR 0x400 /* Support branch counter logging */ 1022 1016 1023 1017 #define EVENT_VAR(_id) event_attr_##_id 1024 1018 #define EVENT_PTR(_id) &event_attr_##_id.attr.attr ··· 1559 1551 void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr); 1560 1552 1561 1553 void intel_ds_init(void); 1554 + 1555 + void intel_pmu_lbr_save_brstack(struct perf_sample_data *data, 1556 + struct cpu_hw_events *cpuc, 1557 + struct perf_event *event); 1562 1558 1563 1559 void intel_pmu_lbr_swap_task_ctx(struct perf_event_pmu_context *prev_epc, 1564 1560 struct perf_event_pmu_context *next_epc);
+2
arch/x86/events/perf_event_flags.h
··· 20 20 PERF_ARCH(PEBS_STLAT, 0x08000) /* st+stlat data address sampling */ 21 21 PERF_ARCH(AMD_BRS, 0x10000) /* AMD Branch Sampling */ 22 22 PERF_ARCH(PEBS_LAT_HYBRID, 0x20000) /* ld and st lat for hybrid */ 23 + PERF_ARCH(NEEDS_BRANCH_STACK, 0x40000) /* require branch stack setup */ 24 + PERF_ARCH(BRANCH_COUNTERS, 0x80000) /* logs the counters in the extra space of each branch */
+5
arch/x86/include/asm/msr-index.h
··· 237 237 #define LBR_INFO_CYCLES 0xffff 238 238 #define LBR_INFO_BR_TYPE_OFFSET 56 239 239 #define LBR_INFO_BR_TYPE (0xfull << LBR_INFO_BR_TYPE_OFFSET) 240 + #define LBR_INFO_BR_CNTR_OFFSET 32 241 + #define LBR_INFO_BR_CNTR_NUM 4 242 + #define LBR_INFO_BR_CNTR_BITS 2 243 + #define LBR_INFO_BR_CNTR_MASK GENMASK_ULL(LBR_INFO_BR_CNTR_BITS - 1, 0) 244 + #define LBR_INFO_BR_CNTR_FULL_MASK GENMASK_ULL(LBR_INFO_BR_CNTR_NUM * LBR_INFO_BR_CNTR_BITS - 1, 0) 240 245 241 246 #define MSR_ARCH_LBR_CTL 0x000014ce 242 247 #define ARCH_LBR_CTL_LBREN BIT(0)
+4
arch/x86/include/asm/perf_event.h
··· 31 31 #define ARCH_PERFMON_EVENTSEL_ENABLE (1ULL << 22) 32 32 #define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23) 33 33 #define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL 34 + #define ARCH_PERFMON_EVENTSEL_BR_CNTR (1ULL << 35) 34 35 35 36 #define INTEL_FIXED_BITS_MASK 0xFULL 36 37 #define INTEL_FIXED_BITS_STRIDE 4 ··· 224 223 unsigned int lbr_timed_lbr:1; 225 224 /* Branch Type Field Supported */ 226 225 unsigned int lbr_br_type:1; 226 + unsigned int reserved:13; 227 + /* Branch counters (Event Logging) Supported */ 228 + unsigned int lbr_counters:4; 227 229 } split; 228 230 unsigned int full; 229 231 };
+1
arch/x86/kernel/smpboot.c
··· 757 757 { 758 758 return cpu_l2c_shared_mask(cpu); 759 759 } 760 + EXPORT_SYMBOL_GPL(cpu_clustergroup_mask); 760 761 761 762 static void impress_friends(void) 762 763 {
+21 -1
include/linux/perf_event.h
··· 1143 1143 return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_PRIV_SAVE; 1144 1144 } 1145 1145 1146 + static inline bool branch_sample_counters(const struct perf_event *event) 1147 + { 1148 + return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS; 1149 + } 1150 + 1151 + static inline bool branch_sample_call_stack(const struct perf_event *event) 1152 + { 1153 + return event->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK; 1154 + } 1146 1155 1147 1156 struct perf_sample_data { 1148 1157 /* ··· 1186 1177 struct perf_callchain_entry *callchain; 1187 1178 struct perf_raw_record *raw; 1188 1179 struct perf_branch_stack *br_stack; 1180 + u64 *br_stack_cntr; 1189 1181 union perf_sample_weight weight; 1190 1182 union perf_mem_data_src data_src; 1191 1183 u64 txn; ··· 1264 1254 1265 1255 static inline void perf_sample_save_brstack(struct perf_sample_data *data, 1266 1256 struct perf_event *event, 1267 - struct perf_branch_stack *brs) 1257 + struct perf_branch_stack *brs, 1258 + u64 *brs_cntr) 1268 1259 { 1269 1260 int size = sizeof(u64); /* nr */ 1270 1261 ··· 1273 1262 size += sizeof(u64); 1274 1263 size += brs->nr * sizeof(struct perf_branch_entry); 1275 1264 1265 + /* 1266 + * The extension space for counters is appended after the 1267 + * struct perf_branch_stack. It is used to store the occurrences 1268 + * of events of each branch. 1269 + */ 1270 + if (brs_cntr) 1271 + size += brs->nr * sizeof(u64); 1272 + 1276 1273 data->br_stack = brs; 1274 + data->br_stack_cntr = brs_cntr; 1277 1275 data->dyn_size += size; 1278 1276 data->sample_flags |= PERF_SAMPLE_BRANCH_STACK; 1279 1277 }
+13
include/uapi/linux/perf_event.h
··· 204 204 205 205 PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT = 18, /* save privilege mode */ 206 206 207 + PERF_SAMPLE_BRANCH_COUNTERS_SHIFT = 19, /* save occurrences of events on a branch */ 208 + 207 209 PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */ 208 210 }; 209 211 ··· 236 234 PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT, 237 235 238 236 PERF_SAMPLE_BRANCH_PRIV_SAVE = 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT, 237 + 238 + PERF_SAMPLE_BRANCH_COUNTERS = 1U << PERF_SAMPLE_BRANCH_COUNTERS_SHIFT, 239 239 240 240 PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, 241 241 }; ··· 986 982 * { u64 nr; 987 983 * { u64 hw_idx; } && PERF_SAMPLE_BRANCH_HW_INDEX 988 984 * { u64 from, to, flags } lbr[nr]; 985 + * # 986 + * # The format of the counters is decided by the 987 + * # "branch_counter_nr" and "branch_counter_width", 988 + * # which are defined in the ABI. 989 + * # 990 + * { u64 counters; } cntr[nr] && PERF_SAMPLE_BRANCH_COUNTERS 989 991 * } && PERF_SAMPLE_BRANCH_STACK 990 992 * 991 993 * { u64 abi; # enum perf_sample_regs_abi ··· 1436 1426 priv:3, /* privilege level */ 1437 1427 reserved:31; 1438 1428 }; 1429 + 1430 + /* Size of used info bits in struct perf_branch_entry */ 1431 + #define PERF_BRANCH_ENTRY_INFO_BITS_MAX 33 1439 1432 1440 1433 union perf_sample_weight { 1441 1434 __u64 full;
+34 -12
kernel/events/core.c
··· 7397 7397 if (branch_sample_hw_index(event)) 7398 7398 perf_output_put(handle, data->br_stack->hw_idx); 7399 7399 perf_output_copy(handle, data->br_stack->entries, size); 7400 + /* 7401 + * Add the extension space which is appended 7402 + * right after the struct perf_branch_stack. 7403 + */ 7404 + if (data->br_stack_cntr) { 7405 + size = data->br_stack->nr * sizeof(u64); 7406 + perf_output_copy(handle, data->br_stack_cntr, size); 7407 + } 7400 7408 } else { 7401 7409 /* 7402 7410 * we always store at least the value of nr ··· 11433 11425 static struct attribute *pmu_dev_attrs[] = { 11434 11426 &dev_attr_type.attr, 11435 11427 &dev_attr_perf_event_mux_interval_ms.attr, 11428 + &dev_attr_nr_addr_filters.attr, 11436 11429 NULL, 11437 11430 }; 11438 - ATTRIBUTE_GROUPS(pmu_dev); 11431 + 11432 + static umode_t pmu_dev_is_visible(struct kobject *kobj, struct attribute *a, int n) 11433 + { 11434 + struct device *dev = kobj_to_dev(kobj); 11435 + struct pmu *pmu = dev_get_drvdata(dev); 11436 + 11437 + if (n == 2 && !pmu->nr_addr_filters) 11438 + return 0; 11439 + 11440 + return a->mode; 11441 + } 11442 + 11443 + static struct attribute_group pmu_dev_attr_group = { 11444 + .is_visible = pmu_dev_is_visible, 11445 + .attrs = pmu_dev_attrs, 11446 + }; 11447 + 11448 + static const struct attribute_group *pmu_dev_groups[] = { 11449 + &pmu_dev_attr_group, 11450 + NULL, 11451 + }; 11439 11452 11440 11453 static int pmu_bus_running; 11441 11454 static struct bus_type pmu_bus = { ··· 11493 11464 if (ret) 11494 11465 goto free_dev; 11495 11466 11496 - /* For PMUs with address filters, throw in an extra attribute: */ 11497 - if (pmu->nr_addr_filters) 11498 - ret = device_create_file(pmu->dev, &dev_attr_nr_addr_filters); 11499 - 11500 - if (ret) 11501 - goto del_dev; 11502 - 11503 - if (pmu->attr_update) 11467 + if (pmu->attr_update) { 11504 11468 ret = sysfs_update_groups(&pmu->dev->kobj, pmu->attr_update); 11505 - 11506 - if (ret) 11507 - goto del_dev; 11469 + if (ret) 11470 + goto del_dev; 11471 + } 11508 11472 11509 11473 out: 11510 11474 return ret;