Merge tag 'sched-urgent-2025-02-22' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull rseq fixes from Ingo Molnar:

- Fix overly spread-out RSEQ concurrency ID allocation pattern that
regressed certain workloads

- Fix RSEQ registration syscall behavior on -EFAULT errors when
CONFIG_DEBUG_RSEQ=y (This debug option is disabled on most
distributions)

* tag 'sched-urgent-2025-02-22' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
rseq: Fix rseq registration with CONFIG_DEBUG_RSEQ
sched: Compact RSEQ concurrency IDs with reduced threads and affinity

Changed files
+34 -9
include
linux
kernel
sched
+4 -3
include/linux/mm_types.h
··· 875 875 */ 876 876 unsigned int nr_cpus_allowed; 877 877 /** 878 - * @max_nr_cid: Maximum number of concurrency IDs allocated. 878 + * @max_nr_cid: Maximum number of allowed concurrency 879 + * IDs allocated. 879 880 * 880 - * Track the highest number of concurrency IDs allocated for the 881 - * mm. 881 + * Track the highest number of allowed concurrency IDs 882 + * allocated for the mm. 882 883 */ 883 884 atomic_t max_nr_cid; 884 885 /**
+8 -3
kernel/rseq.c
··· 507 507 return -EINVAL; 508 508 if (!access_ok(rseq, rseq_len)) 509 509 return -EFAULT; 510 - current->rseq = rseq; 511 - current->rseq_len = rseq_len; 512 - current->rseq_sig = sig; 513 510 #ifdef CONFIG_DEBUG_RSEQ 514 511 /* 515 512 * Initialize the in-kernel rseq fields copy for validation of ··· 518 521 get_user(rseq_kernel_fields(current)->mm_cid, &rseq->mm_cid)) 519 522 return -EFAULT; 520 523 #endif 524 + /* 525 + * Activate the registration by setting the rseq area address, length 526 + * and signature in the task struct. 527 + */ 528 + current->rseq = rseq; 529 + current->rseq_len = rseq_len; 530 + current->rseq_sig = sig; 531 + 521 532 /* 522 533 * If rseq was previously inactive, and has just been 523 534 * registered, ensure the cpu_id_start and cpu_id fields
+22 -3
kernel/sched/sched.h
··· 3698 3698 { 3699 3699 struct cpumask *cidmask = mm_cidmask(mm); 3700 3700 struct mm_cid __percpu *pcpu_cid = mm->pcpu_cid; 3701 - int cid = __this_cpu_read(pcpu_cid->recent_cid); 3701 + int cid, max_nr_cid, allowed_max_nr_cid; 3702 3702 3703 + /* 3704 + * After shrinking the number of threads or reducing the number 3705 + * of allowed cpus, reduce the value of max_nr_cid so expansion 3706 + * of cid allocation will preserve cache locality if the number 3707 + * of threads or allowed cpus increase again. 3708 + */ 3709 + max_nr_cid = atomic_read(&mm->max_nr_cid); 3710 + while ((allowed_max_nr_cid = min_t(int, READ_ONCE(mm->nr_cpus_allowed), 3711 + atomic_read(&mm->mm_users))), 3712 + max_nr_cid > allowed_max_nr_cid) { 3713 + /* atomic_try_cmpxchg loads previous mm->max_nr_cid into max_nr_cid. */ 3714 + if (atomic_try_cmpxchg(&mm->max_nr_cid, &max_nr_cid, allowed_max_nr_cid)) { 3715 + max_nr_cid = allowed_max_nr_cid; 3716 + break; 3717 + } 3718 + } 3703 3719 /* Try to re-use recent cid. This improves cache locality. */ 3704 - if (!mm_cid_is_unset(cid) && !cpumask_test_and_set_cpu(cid, cidmask)) 3720 + cid = __this_cpu_read(pcpu_cid->recent_cid); 3721 + if (!mm_cid_is_unset(cid) && cid < max_nr_cid && 3722 + !cpumask_test_and_set_cpu(cid, cidmask)) 3705 3723 return cid; 3706 3724 /* 3707 3725 * Expand cid allocation if the maximum number of concurrency ··· 3727 3709 * and number of threads. Expanding cid allocation as much as 3728 3710 * possible improves cache locality. 3729 3711 */ 3730 - cid = atomic_read(&mm->max_nr_cid); 3712 + cid = max_nr_cid; 3731 3713 while (cid < READ_ONCE(mm->nr_cpus_allowed) && cid < atomic_read(&mm->mm_users)) { 3714 + /* atomic_try_cmpxchg loads previous mm->max_nr_cid into cid. */ 3732 3715 if (!atomic_try_cmpxchg(&mm->max_nr_cid, &cid, cid + 1)) 3733 3716 continue; 3734 3717 if (!cpumask_test_and_set_cpu(cid, cidmask))