Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf fixes from Thomas Gleixner:
"Four patches which all address lock inversions and deadlocks in the
perf core code and the Intel debug store"

* 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
perf/x86: Fix perf,x86,cpuhp deadlock
perf/core: Fix ctx::mutex deadlock
perf/core: Fix another perf,trace,cpuhp lock inversion
perf/core: Fix lock inversion between perf,trace,cpuhp

Changed files
+60 -20
arch
x86
events
intel
kernel
events
+18 -15
arch/x86/events/intel/ds.c
··· 372 372 static void release_pebs_buffer(int cpu) 373 373 { 374 374 struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu); 375 - struct debug_store *ds = hwev->ds; 376 375 void *cea; 377 376 378 - if (!ds || !x86_pmu.pebs) 377 + if (!x86_pmu.pebs) 379 378 return; 380 379 381 380 kfree(per_cpu(insn_buffer, cpu)); ··· 383 384 /* Clear the fixmap */ 384 385 cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer; 385 386 ds_clear_cea(cea, x86_pmu.pebs_buffer_size); 386 - ds->pebs_buffer_base = 0; 387 387 dsfree_pages(hwev->ds_pebs_vaddr, x86_pmu.pebs_buffer_size); 388 388 hwev->ds_pebs_vaddr = NULL; 389 389 } ··· 417 419 static void release_bts_buffer(int cpu) 418 420 { 419 421 struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu); 420 - struct debug_store *ds = hwev->ds; 421 422 void *cea; 422 423 423 - if (!ds || !x86_pmu.bts) 424 + if (!x86_pmu.bts) 424 425 return; 425 426 426 427 /* Clear the fixmap */ 427 428 cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer; 428 429 ds_clear_cea(cea, BTS_BUFFER_SIZE); 429 - ds->bts_buffer_base = 0; 430 430 dsfree_pages(hwev->ds_bts_vaddr, BTS_BUFFER_SIZE); 431 431 hwev->ds_bts_vaddr = NULL; 432 432 } ··· 450 454 if (!x86_pmu.bts && !x86_pmu.pebs) 451 455 return; 452 456 453 - get_online_cpus(); 454 - for_each_online_cpu(cpu) 457 + for_each_possible_cpu(cpu) 458 + release_ds_buffer(cpu); 459 + 460 + for_each_possible_cpu(cpu) { 461 + /* 462 + * Again, ignore errors from offline CPUs, they will no longer 463 + * observe cpu_hw_events.ds and not program the DS_AREA when 464 + * they come up. 465 + */ 455 466 fini_debug_store_on_cpu(cpu); 467 + } 456 468 457 469 for_each_possible_cpu(cpu) { 458 470 release_pebs_buffer(cpu); 459 471 release_bts_buffer(cpu); 460 - release_ds_buffer(cpu); 461 472 } 462 - put_online_cpus(); 463 473 } 464 474 465 475 void reserve_ds_buffers(void) ··· 484 482 485 483 if (!x86_pmu.pebs) 486 484 pebs_err = 1; 487 - 488 - get_online_cpus(); 489 485 490 486 for_each_possible_cpu(cpu) { 491 487 if (alloc_ds_buffer(cpu)) { ··· 521 521 if (x86_pmu.pebs && !pebs_err) 522 522 x86_pmu.pebs_active = 1; 523 523 524 - for_each_online_cpu(cpu) 524 + for_each_possible_cpu(cpu) { 525 + /* 526 + * Ignores wrmsr_on_cpu() errors for offline CPUs they 527 + * will get this call through intel_pmu_cpu_starting(). 528 + */ 525 529 init_debug_store_on_cpu(cpu); 530 + } 526 531 } 527 - 528 - put_online_cpus(); 529 532 } 530 533 531 534 /*
+42 -5
kernel/events/core.c
··· 1231 1231 * perf_event_context::lock 1232 1232 * perf_event::mmap_mutex 1233 1233 * mmap_sem 1234 + * 1235 + * cpu_hotplug_lock 1236 + * pmus_lock 1237 + * cpuctx->mutex / perf_event_context::mutex 1234 1238 */ 1235 1239 static struct perf_event_context * 1236 1240 perf_event_ctx_lock_nested(struct perf_event *event, int nesting) ··· 4200 4196 { 4201 4197 struct perf_event_context *ctx = event->ctx; 4202 4198 struct perf_event *child, *tmp; 4199 + LIST_HEAD(free_list); 4203 4200 4204 4201 /* 4205 4202 * If we got here through err_file: fput(event_file); we will not have ··· 4273 4268 struct perf_event, child_list); 4274 4269 if (tmp == child) { 4275 4270 perf_remove_from_context(child, DETACH_GROUP); 4276 - list_del(&child->child_list); 4277 - free_event(child); 4271 + list_move(&child->child_list, &free_list); 4278 4272 /* 4279 4273 * This matches the refcount bump in inherit_event(); 4280 4274 * this can't be the last reference. ··· 4287 4283 goto again; 4288 4284 } 4289 4285 mutex_unlock(&event->child_mutex); 4286 + 4287 + list_for_each_entry_safe(child, tmp, &free_list, child_list) { 4288 + list_del(&child->child_list); 4289 + free_event(child); 4290 + } 4290 4291 4291 4292 no_ctx: 4292 4293 put_event(event); /* Must be the 'last' reference */ ··· 8525 8516 return ret; 8526 8517 } 8527 8518 8519 + static int 8520 + perf_tracepoint_set_filter(struct perf_event *event, char *filter_str) 8521 + { 8522 + struct perf_event_context *ctx = event->ctx; 8523 + int ret; 8524 + 8525 + /* 8526 + * Beware, here be dragons!! 8527 + * 8528 + * the tracepoint muck will deadlock against ctx->mutex, but the tracepoint 8529 + * stuff does not actually need it. So temporarily drop ctx->mutex. As per 8530 + * perf_event_ctx_lock() we already have a reference on ctx. 8531 + * 8532 + * This can result in event getting moved to a different ctx, but that 8533 + * does not affect the tracepoint state. 8534 + */ 8535 + mutex_unlock(&ctx->mutex); 8536 + ret = ftrace_profile_set_filter(event, event->attr.config, filter_str); 8537 + mutex_lock(&ctx->mutex); 8538 + 8539 + return ret; 8540 + } 8541 + 8528 8542 static int perf_event_set_filter(struct perf_event *event, void __user *arg) 8529 8543 { 8530 8544 char *filter_str; ··· 8564 8532 8565 8533 if (IS_ENABLED(CONFIG_EVENT_TRACING) && 8566 8534 event->attr.type == PERF_TYPE_TRACEPOINT) 8567 - ret = ftrace_profile_set_filter(event, event->attr.config, 8568 - filter_str); 8535 + ret = perf_tracepoint_set_filter(event, filter_str); 8569 8536 else if (has_addr_filter(event)) 8570 8537 ret = perf_event_set_addr_filter(event, filter_str); 8571 8538 ··· 9199 9168 if (!try_module_get(pmu->module)) 9200 9169 return -ENODEV; 9201 9170 9202 - if (event->group_leader != event) { 9171 + /* 9172 + * A number of pmu->event_init() methods iterate the sibling_list to, 9173 + * for example, validate if the group fits on the PMU. Therefore, 9174 + * if this is a sibling event, acquire the ctx->mutex to protect 9175 + * the sibling_list. 9176 + */ 9177 + if (event->group_leader != event && pmu->task_ctx_nr != perf_sw_context) { 9203 9178 /* 9204 9179 * This ctx->mutex can nest when we're called through 9205 9180 * inheritance. See the perf_event_ctx_lock_nested() comment.