Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf fixes from Ingo Molnar:
"Misc fixes plus a small hw-enablement patch for Intel IB model 58
uncore events"

* 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
perf/x86/intel/lbr: Demand proper privileges for PERF_SAMPLE_BRANCH_KERNEL
perf/x86/intel/lbr: Fix LBR filter
perf/x86: Blacklist all MEM_*_RETIRED events for Ivy Bridge
perf: Fix vmalloc ring buffer pages handling
perf/x86/intel: Fix unintended variable name reuse
perf/x86/intel: Add support for IvyBridge model 58 Uncore
perf/x86/intel: Fix typo in perf_event_intel_uncore.c
x86: Eliminate irq_mis_count counted in arch_irq_stat

+51 -26
+9 -4
arch/x86/kernel/cpu/perf_event_intel.c
··· 128 128 INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */ 129 129 INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */ 130 130 INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ 131 - INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */ 132 - INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ 133 - INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ 134 - INTEL_EVENT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ 131 + /* 132 + * Errata BV98 -- MEM_*_RETIRED events can leak between counters of SMT 133 + * siblings; disable these events because they can corrupt unrelated 134 + * counters. 135 + */ 136 + INTEL_EVENT_CONSTRAINT(0xd0, 0x0), /* MEM_UOPS_RETIRED.* */ 137 + INTEL_EVENT_CONSTRAINT(0xd1, 0x0), /* MEM_LOAD_UOPS_RETIRED.* */ 138 + INTEL_EVENT_CONSTRAINT(0xd2, 0x0), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ 139 + INTEL_EVENT_CONSTRAINT(0xd3, 0x0), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ 135 140 EVENT_CONSTRAINT_END 136 141 }; 137 142
+22 -5
arch/x86/kernel/cpu/perf_event_intel_lbr.c
··· 310 310 * - in case there is no HW filter 311 311 * - in case the HW filter has errata or limitations 312 312 */ 313 - static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event) 313 + static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event) 314 314 { 315 315 u64 br_type = event->attr.branch_sample_type; 316 316 int mask = 0; ··· 318 318 if (br_type & PERF_SAMPLE_BRANCH_USER) 319 319 mask |= X86_BR_USER; 320 320 321 - if (br_type & PERF_SAMPLE_BRANCH_KERNEL) 321 + if (br_type & PERF_SAMPLE_BRANCH_KERNEL) { 322 + if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) 323 + return -EACCES; 322 324 mask |= X86_BR_KERNEL; 325 + } 323 326 324 327 /* we ignore BRANCH_HV here */ 325 328 ··· 342 339 * be used by fixup code for some CPU 343 340 */ 344 341 event->hw.branch_reg.reg = mask; 342 + 343 + return 0; 345 344 } 346 345 347 346 /* ··· 391 386 /* 392 387 * setup SW LBR filter 393 388 */ 394 - intel_pmu_setup_sw_lbr_filter(event); 389 + ret = intel_pmu_setup_sw_lbr_filter(event); 390 + if (ret) 391 + return ret; 395 392 396 393 /* 397 394 * setup HW LBR filter, if any ··· 449 442 return X86_BR_NONE; 450 443 451 444 addr = buf; 452 - } else 453 - addr = (void *)from; 445 + } else { 446 + /* 447 + * The LBR logs any address in the IP, even if the IP just 448 + * faulted. This means userspace can control the from address. 449 + * Ensure we don't blindy read any address by validating it is 450 + * a known text address. 451 + */ 452 + if (kernel_text_address(from)) 453 + addr = (void *)from; 454 + else 455 + return X86_BR_NONE; 456 + } 454 457 455 458 /* 456 459 * decoder needs to know the ABI especially
+10 -9
arch/x86/kernel/cpu/perf_event_intel_uncore.c
··· 3093 3093 static int __init uncore_type_init(struct intel_uncore_type *type) 3094 3094 { 3095 3095 struct intel_uncore_pmu *pmus; 3096 - struct attribute_group *events_group; 3096 + struct attribute_group *attr_group; 3097 3097 struct attribute **attrs; 3098 3098 int i, j; 3099 3099 ··· 3120 3120 while (type->event_descs[i].attr.attr.name) 3121 3121 i++; 3122 3122 3123 - events_group = kzalloc(sizeof(struct attribute *) * (i + 1) + 3124 - sizeof(*events_group), GFP_KERNEL); 3125 - if (!events_group) 3123 + attr_group = kzalloc(sizeof(struct attribute *) * (i + 1) + 3124 + sizeof(*attr_group), GFP_KERNEL); 3125 + if (!attr_group) 3126 3126 goto fail; 3127 3127 3128 - attrs = (struct attribute **)(events_group + 1); 3129 - events_group->name = "events"; 3130 - events_group->attrs = attrs; 3128 + attrs = (struct attribute **)(attr_group + 1); 3129 + attr_group->name = "events"; 3130 + attr_group->attrs = attrs; 3131 3131 3132 3132 for (j = 0; j < i; j++) 3133 3133 attrs[j] = &type->event_descs[j].attr.attr; 3134 3134 3135 - type->events_group = events_group; 3135 + type->events_group = attr_group; 3136 3136 } 3137 3137 3138 3138 type->pmu_group = &uncore_pmu_attr_group; ··· 3545 3545 msr_uncores = nhm_msr_uncores; 3546 3546 break; 3547 3547 case 42: /* Sandy Bridge */ 3548 + case 58: /* Ivy Bridge */ 3548 3549 if (snb_uncore_cbox.num_boxes > max_cores) 3549 3550 snb_uncore_cbox.num_boxes = max_cores; 3550 3551 msr_uncores = snb_msr_uncores; 3551 3552 break; 3552 - case 45: /* Sandy Birdge-EP */ 3553 + case 45: /* Sandy Bridge-EP */ 3553 3554 if (snbep_uncore_cbox.num_boxes > max_cores) 3554 3555 snbep_uncore_cbox.num_boxes = max_cores; 3555 3556 msr_uncores = snbep_msr_uncores;
-4
arch/x86/kernel/irq.c
··· 165 165 u64 arch_irq_stat(void) 166 166 { 167 167 u64 sum = atomic_read(&irq_err_count); 168 - 169 - #ifdef CONFIG_X86_IO_APIC 170 - sum += atomic_read(&irq_mis_count); 171 - #endif 172 168 return sum; 173 169 } 174 170
+10 -4
kernel/events/ring_buffer.c
··· 326 326 } 327 327 328 328 #else 329 + static int data_page_nr(struct ring_buffer *rb) 330 + { 331 + return rb->nr_pages << page_order(rb); 332 + } 329 333 330 334 struct page * 331 335 perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff) 332 336 { 333 - if (pgoff > (1UL << page_order(rb))) 337 + /* The '>' counts in the user page. */ 338 + if (pgoff > data_page_nr(rb)) 334 339 return NULL; 335 340 336 341 return vmalloc_to_page((void *)rb->user_page + pgoff * PAGE_SIZE); ··· 355 350 int i, nr; 356 351 357 352 rb = container_of(work, struct ring_buffer, work); 358 - nr = 1 << page_order(rb); 353 + nr = data_page_nr(rb); 359 354 360 355 base = rb->user_page; 361 - for (i = 0; i < nr + 1; i++) 356 + /* The '<=' counts in the user page. */ 357 + for (i = 0; i <= nr; i++) 362 358 perf_mmap_unmark_page(base + (i * PAGE_SIZE)); 363 359 364 360 vfree(base); ··· 393 387 rb->user_page = all_buf; 394 388 rb->data_pages[0] = all_buf + PAGE_SIZE; 395 389 rb->page_order = ilog2(nr_pages); 396 - rb->nr_pages = 1; 390 + rb->nr_pages = !!nr_pages; 397 391 398 392 ring_buffer_init(rb, watermark, flags); 399 393