Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

sched/core: Initialize the idle task with preemption disabled

As pointed out by commit

de9b8f5dcbd9 ("sched: Fix crash trying to dequeue/enqueue the idle thread")

init_idle() can and will be invoked more than once on the same idle
task. At boot time, it is invoked for the boot CPU thread by
sched_init(). Then smp_init() creates the threads for all the secondary
CPUs and invokes init_idle() on them.

As the hotplug machinery brings the secondaries to life, it will issue
calls to idle_thread_get(), which itself invokes init_idle() yet again.
In this case it's invoked twice more per secondary: at _cpu_up(), and at
bringup_cpu().

Given smp_init() already initializes the idle tasks for all *possible*
CPUs, no further initialization should be required. Now, removing
init_idle() from idle_thread_get() exposes some interesting expectations
with regards to the idle task's preempt_count: the secondary startup always
issues a preempt_disable(), requiring some reset of the preempt count to 0
between hot-unplug and hotplug, which is currently served by
idle_thread_get() -> idle_init().

Given the idle task is supposed to have preemption disabled once and never
see it re-enabled, it seems that what we actually want is to initialize its
preempt_count to PREEMPT_DISABLED and leave it there. Do that, and remove
init_idle() from idle_thread_get().

Secondary startups were patched via coccinelle:

@begone@
@@

-preempt_disable();
...
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);

Signed-off-by: Valentin Schneider <valentin.schneider@arm.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20210512094636.2958515-1-valentin.schneider@arm.com

authored by

Valentin Schneider and committed by
Ingo Molnar
f1a0a376 9f269900

+8 -34
-1
arch/alpha/kernel/smp.c
··· 166 166 DBGS(("smp_callin: commencing CPU %d current %p active_mm %p\n", 167 167 cpuid, current, current->active_mm)); 168 168 169 - preempt_disable(); 170 169 cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); 171 170 } 172 171
-1
arch/arc/kernel/smp.c
··· 189 189 pr_info("## CPU%u LIVE ##: Executing Code...\n", cpu); 190 190 191 191 local_irq_enable(); 192 - preempt_disable(); 193 192 cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); 194 193 } 195 194
-1
arch/arm/kernel/smp.c
··· 432 432 #endif 433 433 pr_debug("CPU%u: Booted secondary processor\n", cpu); 434 434 435 - preempt_disable(); 436 435 trace_hardirqs_off(); 437 436 438 437 /*
+1 -1
arch/arm64/include/asm/preempt.h
··· 23 23 } while (0) 24 24 25 25 #define init_idle_preempt_count(p, cpu) do { \ 26 - task_thread_info(p)->preempt_count = PREEMPT_ENABLED; \ 26 + task_thread_info(p)->preempt_count = PREEMPT_DISABLED; \ 27 27 } while (0) 28 28 29 29 static inline void set_preempt_need_resched(void)
-1
arch/arm64/kernel/smp.c
··· 224 224 init_gic_priority_masking(); 225 225 226 226 rcu_cpu_starting(cpu); 227 - preempt_disable(); 228 227 trace_hardirqs_off(); 229 228 230 229 /*
-1
arch/csky/kernel/smp.c
··· 281 281 pr_info("CPU%u Online: %s...\n", cpu, __func__); 282 282 283 283 local_irq_enable(); 284 - preempt_disable(); 285 284 cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); 286 285 } 287 286
-1
arch/ia64/kernel/smpboot.c
··· 441 441 #endif 442 442 efi_map_pal_code(); 443 443 cpu_init(); 444 - preempt_disable(); 445 444 smp_callin(); 446 445 447 446 cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
-1
arch/mips/kernel/smp.c
··· 348 348 */ 349 349 350 350 calibrate_delay(); 351 - preempt_disable(); 352 351 cpu = smp_processor_id(); 353 352 cpu_data[cpu].udelay_val = loops_per_jiffy; 354 353
-2
arch/openrisc/kernel/smp.c
··· 145 145 set_cpu_online(cpu, true); 146 146 147 147 local_irq_enable(); 148 - 149 - preempt_disable(); 150 148 /* 151 149 * OK, it's off to the idle thread for us 152 150 */
-1
arch/parisc/kernel/smp.c
··· 302 302 #endif 303 303 304 304 smp_cpu_init(slave_id); 305 - preempt_disable(); 306 305 307 306 flush_cache_all_local(); /* start with known state */ 308 307 flush_tlb_all_local(NULL);
-1
arch/powerpc/kernel/smp.c
··· 1547 1547 smp_store_cpu_info(cpu); 1548 1548 set_dec(tb_ticks_per_jiffy); 1549 1549 rcu_cpu_starting(cpu); 1550 - preempt_disable(); 1551 1550 cpu_callin_map[cpu] = 1; 1552 1551 1553 1552 if (smp_ops->setup_cpu)
-1
arch/riscv/kernel/smpboot.c
··· 180 180 * Disable preemption before enabling interrupts, so we don't try to 181 181 * schedule a CPU that hasn't actually started yet. 182 182 */ 183 - preempt_disable(); 184 183 local_irq_enable(); 185 184 cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); 186 185 }
+2 -2
arch/s390/include/asm/preempt.h
··· 32 32 #define init_task_preempt_count(p) do { } while (0) 33 33 34 34 #define init_idle_preempt_count(p, cpu) do { \ 35 - S390_lowcore.preempt_count = PREEMPT_ENABLED; \ 35 + S390_lowcore.preempt_count = PREEMPT_DISABLED; \ 36 36 } while (0) 37 37 38 38 static inline void set_preempt_need_resched(void) ··· 91 91 #define init_task_preempt_count(p) do { } while (0) 92 92 93 93 #define init_idle_preempt_count(p, cpu) do { \ 94 - S390_lowcore.preempt_count = PREEMPT_ENABLED; \ 94 + S390_lowcore.preempt_count = PREEMPT_DISABLED; \ 95 95 } while (0) 96 96 97 97 static inline void set_preempt_need_resched(void)
-1
arch/s390/kernel/smp.c
··· 878 878 restore_access_regs(S390_lowcore.access_regs_save_area); 879 879 cpu_init(); 880 880 rcu_cpu_starting(cpu); 881 - preempt_disable(); 882 881 init_cpu_timer(); 883 882 vtime_init(); 884 883 vdso_getcpu_init();
-2
arch/sh/kernel/smp.c
··· 186 186 187 187 per_cpu_trap_init(); 188 188 189 - preempt_disable(); 190 - 191 189 notify_cpu_starting(cpu); 192 190 193 191 local_irq_enable();
-1
arch/sparc/kernel/smp_32.c
··· 348 348 */ 349 349 arch_cpu_pre_starting(arg); 350 350 351 - preempt_disable(); 352 351 cpu = smp_processor_id(); 353 352 354 353 notify_cpu_starting(cpu);
-3
arch/sparc/kernel/smp_64.c
··· 138 138 139 139 set_cpu_online(cpuid, true); 140 140 141 - /* idle thread is expected to have preempt disabled */ 142 - preempt_disable(); 143 - 144 141 local_irq_enable(); 145 142 146 143 cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
+1 -1
arch/x86/include/asm/preempt.h
··· 44 44 #define init_task_preempt_count(p) do { } while (0) 45 45 46 46 #define init_idle_preempt_count(p, cpu) do { \ 47 - per_cpu(__preempt_count, (cpu)) = PREEMPT_ENABLED; \ 47 + per_cpu(__preempt_count, (cpu)) = PREEMPT_DISABLED; \ 48 48 } while (0) 49 49 50 50 /*
-1
arch/x86/kernel/smpboot.c
··· 236 236 cpu_init(); 237 237 rcu_cpu_starting(raw_smp_processor_id()); 238 238 x86_cpuinit.early_percpu_clock_init(); 239 - preempt_disable(); 240 239 smp_callin(); 241 240 242 241 enable_start_cpu0 = 0;
-1
arch/xtensa/kernel/smp.c
··· 145 145 cpumask_set_cpu(cpu, mm_cpumask(mm)); 146 146 enter_lazy_tlb(mm, current); 147 147 148 - preempt_disable(); 149 148 trace_hardirqs_off(); 150 149 151 150 calibrate_delay();
+1 -1
include/asm-generic/preempt.h
··· 29 29 } while (0) 30 30 31 31 #define init_idle_preempt_count(p, cpu) do { \ 32 - task_thread_info(p)->preempt_count = PREEMPT_ENABLED; \ 32 + task_thread_info(p)->preempt_count = PREEMPT_DISABLED; \ 33 33 } while (0) 34 34 35 35 static __always_inline void set_preempt_need_resched(void)
+1 -5
init/main.c
··· 941 941 * time - but meanwhile we still have a functioning scheduler. 942 942 */ 943 943 sched_init(); 944 - /* 945 - * Disable preemption - early bootup scheduling is extremely 946 - * fragile until we cpu_idle() for the first time. 947 - */ 948 - preempt_disable(); 944 + 949 945 if (WARN(!irqs_disabled(), 950 946 "Interrupts were enabled *very* early, fixing it\n")) 951 947 local_irq_disable();
+1 -1
kernel/fork.c
··· 2412 2412 } 2413 2413 } 2414 2414 2415 - struct task_struct *fork_idle(int cpu) 2415 + struct task_struct * __init fork_idle(int cpu) 2416 2416 { 2417 2417 struct task_struct *task; 2418 2418 struct kernel_clone_args args = {
+1 -1
kernel/sched/core.c
··· 8227 8227 * NOTE: this function does not set the idle thread's NEED_RESCHED 8228 8228 * flag, to make booting more robust. 8229 8229 */ 8230 - void init_idle(struct task_struct *idle, int cpu) 8230 + void __init init_idle(struct task_struct *idle, int cpu) 8231 8231 { 8232 8232 struct rq *rq = cpu_rq(cpu); 8233 8233 unsigned long flags;
-1
kernel/smpboot.c
··· 33 33 34 34 if (!tsk) 35 35 return ERR_PTR(-ENOMEM); 36 - init_idle(tsk, cpu); 37 36 return tsk; 38 37 } 39 38