Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

tracing/user_events: Split up mm alloc and attach

When a new mm is being created in a fork() path it currently is
allocated and then attached in one go. This leaves the mm exposed out to
the tracing register callbacks while any parent enabler locations are
copied in. This should not happen.

Split up mm alloc and attach as unique operations. When duplicating
enablers, first alloc, then duplicate, and only upon success, attach.
This prevents any timing window outside of the event_reg mutex for
enablement walking. This allows for dropping RCU requirement for
enablement walking in later patches.

Link: https://lkml.kernel.org/r/20230519230741.669-2-beaub@linux.microsoft.com
Link: https://lore.kernel.org/linux-trace-kernel/CAHk-=whTBvXJuoi_kACo3qi5WZUmRrhyA-_=rRFsycTytmB6qw@mail.gmail.com/

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
[ change log written by Beau Belgrave ]
Signed-off-by: Beau Belgrave <beaub@linux.microsoft.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>

authored by

Linus Torvalds and committed by
Steven Rostedt (Google)
3e0fea09 632478a0

+18 -11
+18 -11
kernel/trace/trace_events_user.c
··· 539 539 return found; 540 540 } 541 541 542 - static struct user_event_mm *user_event_mm_create(struct task_struct *t) 542 + static struct user_event_mm *user_event_mm_alloc(struct task_struct *t) 543 543 { 544 544 struct user_event_mm *user_mm; 545 - unsigned long flags; 546 545 547 546 user_mm = kzalloc(sizeof(*user_mm), GFP_KERNEL_ACCOUNT); 548 547 ··· 552 553 INIT_LIST_HEAD(&user_mm->enablers); 553 554 refcount_set(&user_mm->refcnt, 1); 554 555 refcount_set(&user_mm->tasks, 1); 555 - 556 - spin_lock_irqsave(&user_event_mms_lock, flags); 557 - list_add_rcu(&user_mm->link, &user_event_mms); 558 - spin_unlock_irqrestore(&user_event_mms_lock, flags); 559 - 560 - t->user_event_mm = user_mm; 561 556 562 557 /* 563 558 * The lifetime of the memory descriptor can slightly outlast ··· 566 573 return user_mm; 567 574 } 568 575 576 + static void user_event_mm_attach(struct user_event_mm *user_mm, struct task_struct *t) 577 + { 578 + unsigned long flags; 579 + 580 + spin_lock_irqsave(&user_event_mms_lock, flags); 581 + list_add_rcu(&user_mm->link, &user_event_mms); 582 + spin_unlock_irqrestore(&user_event_mms_lock, flags); 583 + 584 + t->user_event_mm = user_mm; 585 + } 586 + 569 587 static struct user_event_mm *current_user_event_mm(void) 570 588 { 571 589 struct user_event_mm *user_mm = current->user_event_mm; ··· 584 580 if (user_mm) 585 581 goto inc; 586 582 587 - user_mm = user_event_mm_create(current); 583 + user_mm = user_event_mm_alloc(current); 588 584 589 585 if (!user_mm) 590 586 goto error; 587 + 588 + user_event_mm_attach(user_mm, current); 591 589 inc: 592 590 refcount_inc(&user_mm->refcnt); 593 591 error: ··· 677 671 678 672 void user_event_mm_dup(struct task_struct *t, struct user_event_mm *old_mm) 679 673 { 680 - struct user_event_mm *mm = user_event_mm_create(t); 674 + struct user_event_mm *mm = user_event_mm_alloc(t); 681 675 struct user_event_enabler *enabler; 682 676 683 677 if (!mm) ··· 691 685 692 686 rcu_read_unlock(); 693 687 688 + user_event_mm_attach(mm, t); 694 689 return; 695 690 error: 696 691 rcu_read_unlock(); 697 - user_event_mm_remove(t); 692 + user_event_mm_destroy(mm); 698 693 } 699 694 700 695 static bool current_user_event_enabler_exists(unsigned long uaddr,