Merge tag 'sched_ext-for-6.18-rc6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext

Pull sched_ext fixes from Tejun Heo:
"Five fixes addressing PREEMPT_RT compatibility and locking issues.

Three commits fix potential deadlocks and sleeps in atomic contexts on
RT kernels by converting locks to raw spinlocks and ensuring IRQ work
runs in hard-irq context. The remaining two fix unsafe locking in the
debug dump path and a variable dereference typo"

* tag 'sched_ext-for-6.18-rc6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext:
sched_ext: Use IRQ_WORK_INIT_HARD() to initialize rq->scx.kick_cpus_irq_work
sched_ext: Fix possible deadlock in the deferred_irq_workfn()
sched/ext: convert scx_tasks_lock to raw spinlock
sched_ext: Fix unsafe locking in the scx_dump_state()
sched_ext: Fix use of uninitialized variable in scx_bpf_cpuperf_set()

+13 -13
+13 -13
kernel/sched/ext.c
··· 25 * guarantee system safety. Maintain a dedicated task list which contains every 26 * task between its fork and eventual free. 27 */ 28 - static DEFINE_SPINLOCK(scx_tasks_lock); 29 static LIST_HEAD(scx_tasks); 30 31 /* ops enable/disable */ ··· 476 BUILD_BUG_ON(__SCX_DSQ_ITER_ALL_FLAGS & 477 ((1U << __SCX_DSQ_LNODE_PRIV_SHIFT) - 1)); 478 479 - spin_lock_irq(&scx_tasks_lock); 480 481 iter->cursor = (struct sched_ext_entity){ .flags = SCX_TASK_CURSOR }; 482 list_add(&iter->cursor.tasks_node, &scx_tasks); ··· 507 __scx_task_iter_rq_unlock(iter); 508 if (iter->list_locked) { 509 iter->list_locked = false; 510 - spin_unlock_irq(&scx_tasks_lock); 511 } 512 } 513 514 static void __scx_task_iter_maybe_relock(struct scx_task_iter *iter) 515 { 516 if (!iter->list_locked) { 517 - spin_lock_irq(&scx_tasks_lock); 518 iter->list_locked = true; 519 } 520 } ··· 2940 } 2941 } 2942 2943 - spin_lock_irq(&scx_tasks_lock); 2944 list_add_tail(&p->scx.tasks_node, &scx_tasks); 2945 - spin_unlock_irq(&scx_tasks_lock); 2946 2947 percpu_up_read(&scx_fork_rwsem); 2948 } ··· 2966 { 2967 unsigned long flags; 2968 2969 - spin_lock_irqsave(&scx_tasks_lock, flags); 2970 list_del_init(&p->scx.tasks_node); 2971 - spin_unlock_irqrestore(&scx_tasks_lock, flags); 2972 2973 /* 2974 * @p is off scx_tasks and wholly ours. scx_enable()'s READY -> ENABLED ··· 4276 size_t avail, used; 4277 bool idle; 4278 4279 - rq_lock(rq, &rf); 4280 4281 idle = list_empty(&rq->scx.runnable_list) && 4282 rq->curr->sched_class == &idle_sched_class; ··· 4345 list_for_each_entry(p, &rq->scx.runnable_list, scx.runnable_node) 4346 scx_dump_task(&s, &dctx, p, ' '); 4347 next: 4348 - rq_unlock(rq, &rf); 4349 } 4350 4351 dump_newline(&s); ··· 5321 BUG_ON(!zalloc_cpumask_var_node(&rq->scx.cpus_to_kick_if_idle, GFP_KERNEL, n)); 5322 BUG_ON(!zalloc_cpumask_var_node(&rq->scx.cpus_to_preempt, GFP_KERNEL, n)); 5323 BUG_ON(!zalloc_cpumask_var_node(&rq->scx.cpus_to_wait, GFP_KERNEL, n)); 5324 - init_irq_work(&rq->scx.deferred_irq_work, deferred_irq_workfn); 5325 - init_irq_work(&rq->scx.kick_cpus_irq_work, kick_cpus_irq_workfn); 5326 5327 if (cpu_online(cpu)) 5328 cpu_rq(cpu)->scx.flags |= SCX_RQ_ONLINE; ··· 6401 6402 guard(rcu)(); 6403 6404 - sch = rcu_dereference(sch); 6405 if (unlikely(!sch)) 6406 return; 6407
··· 25 * guarantee system safety. Maintain a dedicated task list which contains every 26 * task between its fork and eventual free. 27 */ 28 + static DEFINE_RAW_SPINLOCK(scx_tasks_lock); 29 static LIST_HEAD(scx_tasks); 30 31 /* ops enable/disable */ ··· 476 BUILD_BUG_ON(__SCX_DSQ_ITER_ALL_FLAGS & 477 ((1U << __SCX_DSQ_LNODE_PRIV_SHIFT) - 1)); 478 479 + raw_spin_lock_irq(&scx_tasks_lock); 480 481 iter->cursor = (struct sched_ext_entity){ .flags = SCX_TASK_CURSOR }; 482 list_add(&iter->cursor.tasks_node, &scx_tasks); ··· 507 __scx_task_iter_rq_unlock(iter); 508 if (iter->list_locked) { 509 iter->list_locked = false; 510 + raw_spin_unlock_irq(&scx_tasks_lock); 511 } 512 } 513 514 static void __scx_task_iter_maybe_relock(struct scx_task_iter *iter) 515 { 516 if (!iter->list_locked) { 517 + raw_spin_lock_irq(&scx_tasks_lock); 518 iter->list_locked = true; 519 } 520 } ··· 2940 } 2941 } 2942 2943 + raw_spin_lock_irq(&scx_tasks_lock); 2944 list_add_tail(&p->scx.tasks_node, &scx_tasks); 2945 + raw_spin_unlock_irq(&scx_tasks_lock); 2946 2947 percpu_up_read(&scx_fork_rwsem); 2948 } ··· 2966 { 2967 unsigned long flags; 2968 2969 + raw_spin_lock_irqsave(&scx_tasks_lock, flags); 2970 list_del_init(&p->scx.tasks_node); 2971 + raw_spin_unlock_irqrestore(&scx_tasks_lock, flags); 2972 2973 /* 2974 * @p is off scx_tasks and wholly ours. scx_enable()'s READY -> ENABLED ··· 4276 size_t avail, used; 4277 bool idle; 4278 4279 + rq_lock_irqsave(rq, &rf); 4280 4281 idle = list_empty(&rq->scx.runnable_list) && 4282 rq->curr->sched_class == &idle_sched_class; ··· 4345 list_for_each_entry(p, &rq->scx.runnable_list, scx.runnable_node) 4346 scx_dump_task(&s, &dctx, p, ' '); 4347 next: 4348 + rq_unlock_irqrestore(rq, &rf); 4349 } 4350 4351 dump_newline(&s); ··· 5321 BUG_ON(!zalloc_cpumask_var_node(&rq->scx.cpus_to_kick_if_idle, GFP_KERNEL, n)); 5322 BUG_ON(!zalloc_cpumask_var_node(&rq->scx.cpus_to_preempt, GFP_KERNEL, n)); 5323 BUG_ON(!zalloc_cpumask_var_node(&rq->scx.cpus_to_wait, GFP_KERNEL, n)); 5324 + rq->scx.deferred_irq_work = IRQ_WORK_INIT_HARD(deferred_irq_workfn); 5325 + rq->scx.kick_cpus_irq_work = IRQ_WORK_INIT_HARD(kick_cpus_irq_workfn); 5326 5327 if (cpu_online(cpu)) 5328 cpu_rq(cpu)->scx.flags |= SCX_RQ_ONLINE; ··· 6401 6402 guard(rcu)(); 6403 6404 + sch = rcu_dereference(scx_root); 6405 if (unlikely(!sch)) 6406 return; 6407