Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

sched_ext: Pass locked CPU parameter to scx_hardlockup() and add docs

With the buddy lockup detector, smp_processor_id() returns the detecting CPU,
not the locked CPU, making scx_hardlockup()'s printouts confusing. Pass the
locked CPU number from watchdog_hardlockup_check() as a parameter instead.

Also add kerneldoc comments to handle_lockup(), scx_hardlockup(), and
scx_rcu_cpu_stall() documenting their return value semantics.

Suggested-by: Doug Anderson <dianders@chromium.org>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Acked-by: Andrea Righi <arighi@nvidia.com>
Reviewed-by: Emil Tsalapatis <emil@etsalapatis.com>
Signed-off-by: Tejun Heo <tj@kernel.org>

Tejun Heo 1dcb98bb 67932f69

+25 -6
+2 -2
include/linux/sched/ext.h
··· 230 230 void sched_ext_dead(struct task_struct *p); 231 231 void print_scx_info(const char *log_lvl, struct task_struct *p); 232 232 void scx_softlockup(u32 dur_s); 233 - bool scx_hardlockup(void); 233 + bool scx_hardlockup(int cpu); 234 234 bool scx_rcu_cpu_stall(void); 235 235 236 236 #else /* !CONFIG_SCHED_CLASS_EXT */ ··· 238 238 static inline void sched_ext_dead(struct task_struct *p) {} 239 239 static inline void print_scx_info(const char *log_lvl, struct task_struct *p) {} 240 240 static inline void scx_softlockup(u32 dur_s) {} 241 - static inline bool scx_hardlockup(void) { return false; } 241 + static inline bool scx_hardlockup(int cpu) { return false; } 242 242 static inline bool scx_rcu_cpu_stall(void) { return false; } 243 243 244 244 #endif /* CONFIG_SCHED_CLASS_EXT */
+22 -3
kernel/sched/ext.c
··· 3687 3687 return false; 3688 3688 } 3689 3689 3690 + /** 3691 + * handle_lockup - sched_ext common lockup handler 3692 + * @fmt: format string 3693 + * 3694 + * Called on system stall or lockup condition and initiates abort of sched_ext 3695 + * if enabled, which may resolve the reported lockup. 3696 + * 3697 + * Returns %true if sched_ext is enabled and abort was initiated, which may 3698 + * resolve the lockup. %false if sched_ext is not enabled or abort was already 3699 + * initiated by someone else. 3700 + */ 3690 3701 static __printf(1, 2) bool handle_lockup(const char *fmt, ...) 3691 3702 { 3692 3703 struct scx_sched *sch; ··· 3729 3718 * that may not be caused by the current BPF scheduler, try kicking out the 3730 3719 * current scheduler in an attempt to recover the system to a good state before 3731 3720 * issuing panics. 3721 + * 3722 + * Returns %true if sched_ext is enabled and abort was initiated, which may 3723 + * resolve the reported RCU stall. %false if sched_ext is not enabled or someone 3724 + * else already initiated abort. 3732 3725 */ 3733 3726 bool scx_rcu_cpu_stall(void) 3734 3727 { ··· 3765 3750 * numerous affinitized tasks in a single queue and directing all CPUs at it. 3766 3751 * Try kicking out the current scheduler in an attempt to recover the system to 3767 3752 * a good state before taking more drastic actions. 3753 + * 3754 + * Returns %true if sched_ext is enabled and abort was initiated, which may 3755 + * resolve the reported hardlockdup. %false if sched_ext is not enabled or 3756 + * someone else already initiated abort. 3768 3757 */ 3769 - bool scx_hardlockup(void) 3758 + bool scx_hardlockup(int cpu) 3770 3759 { 3771 - if (!handle_lockup("hard lockup - CPU %d", smp_processor_id())) 3760 + if (!handle_lockup("hard lockup - CPU %d", cpu)) 3772 3761 return false; 3773 3762 3774 3763 printk_deferred(KERN_ERR "sched_ext: Hard lockup - CPU %d, disabling BPF scheduler\n", 3775 - smp_processor_id()); 3764 + cpu); 3776 3765 return true; 3777 3766 } 3778 3767
+1 -1
kernel/watchdog.c
··· 203 203 * only once when sched_ext is enabled and will immediately 204 204 * abort the BPF scheduler and print out a warning message. 205 205 */ 206 - if (scx_hardlockup()) 206 + if (scx_hardlockup(cpu)) 207 207 return; 208 208 209 209 /* Only print hardlockups once. */