Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86/asm/entry: Replace this_cpu_sp0() with current_top_of_stack() and fix it on x86_32

I broke 32-bit kernels. The implementation of sp0 was correct
as far as I can tell, but sp0 was much weirder on x86_32 than I
realized. It has the following issues:

- Init's sp0 is inconsistent with everything else's: non-init tasks
are offset by 8 bytes. (I have no idea why, and the comment is unhelpful.)

- vm86 does crazy things to sp0.

Fix it up by replacing this_cpu_sp0() with
current_top_of_stack() and using a new percpu variable to track
the top of the stack on x86_32.

Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 75182b1632a8 ("x86/asm/entry: Switch all C consumers of kernel_stack to this_cpu_sp0()")
Link: http://lkml.kernel.org/r/d09dbe270883433776e0cbee3c7079433349e96d.1425692936.git.luto@amacapital.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>

authored by

Andy Lutomirski and committed by
Ingo Molnar
a7fcf28d b27559a4

+33 -12
+10 -1
arch/x86/include/asm/processor.h
··· 284 284 285 285 DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss); 286 286 287 + #ifdef CONFIG_X86_32 288 + DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack); 289 + #endif 290 + 287 291 /* 288 292 * Save the original ist values for checking stack pointers during debugging 289 293 */ ··· 568 564 #endif 569 565 } 570 566 571 - static inline unsigned long this_cpu_sp0(void) 567 + static inline unsigned long current_top_of_stack(void) 572 568 { 569 + #ifdef CONFIG_X86_64 573 570 return this_cpu_read_stable(cpu_tss.x86_tss.sp0); 571 + #else 572 + /* sp0 on x86_32 is special in and around vm86 mode. */ 573 + return this_cpu_read_stable(cpu_current_top_of_stack); 574 + #endif 574 575 } 575 576 576 577 #ifdef CONFIG_PARAVIRT
+1 -3
arch/x86/include/asm/thread_info.h
··· 158 158 159 159 static inline struct thread_info *current_thread_info(void) 160 160 { 161 - struct thread_info *ti; 162 - ti = (void *)(this_cpu_sp0() - THREAD_SIZE); 163 - return ti; 161 + return (struct thread_info *)(current_top_of_stack() - THREAD_SIZE); 164 162 } 165 163 166 164 static inline unsigned long current_stack_pointer(void)
+11 -2
arch/x86/kernel/cpu/common.c
··· 1130 1130 irq_stack_union) __aligned(PAGE_SIZE) __visible; 1131 1131 1132 1132 /* 1133 - * The following four percpu variables are hot. Align current_task to 1134 - * cacheline size such that all four fall in the same cacheline. 1133 + * The following percpu variables are hot. Align current_task to 1134 + * cacheline size such that they fall in the same cacheline. 1135 1135 */ 1136 1136 DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned = 1137 1137 &init_task; ··· 1225 1225 DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; 1226 1226 EXPORT_PER_CPU_SYMBOL(__preempt_count); 1227 1227 DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); 1228 + 1229 + /* 1230 + * On x86_32, vm86 modifies tss.sp0, so sp0 isn't a reliable way to find 1231 + * the top of the kernel stack. Use an extra percpu variable to track the 1232 + * top of the kernel stack directly. 1233 + */ 1234 + DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) = 1235 + (unsigned long)&init_thread_union + THREAD_SIZE; 1236 + EXPORT_PER_CPU_SYMBOL(cpu_current_top_of_stack); 1228 1237 1229 1238 #ifdef CONFIG_CC_STACKPROTECTOR 1230 1239 DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
+7 -4
arch/x86/kernel/process_32.c
··· 306 306 arch_end_context_switch(next_p); 307 307 308 308 /* 309 - * Reload esp0. This changes current_thread_info(). 309 + * Reload esp0, kernel_stack, and current_top_of_stack. This changes 310 + * current_thread_info(). 310 311 */ 311 312 load_sp0(tss, next); 312 - 313 313 this_cpu_write(kernel_stack, 314 - (unsigned long)task_stack_page(next_p) + 315 - THREAD_SIZE - KERNEL_STACK_OFFSET); 314 + (unsigned long)task_stack_page(next_p) + 315 + THREAD_SIZE - KERNEL_STACK_OFFSET); 316 + this_cpu_write(cpu_current_top_of_stack, 317 + (unsigned long)task_stack_page(next_p) + 318 + THREAD_SIZE); 316 319 317 320 /* 318 321 * Restore %gs if needed (which is common)
+2
arch/x86/kernel/smpboot.c
··· 806 806 #ifdef CONFIG_X86_32 807 807 /* Stack for startup_32 can be just as for start_secondary onwards */ 808 808 irq_ctx_init(cpu); 809 + per_cpu(cpu_current_top_of_stack, cpu) = 810 + (unsigned long)task_stack_page(idle) + THREAD_SIZE; 809 811 #else 810 812 clear_tsk_thread_flag(idle, TIF_FORK); 811 813 initial_gs = per_cpu_offset(cpu);
+2 -2
arch/x86/kernel/traps.c
··· 174 174 * will catch asm bugs and any attempt to use ist_preempt_enable 175 175 * from double_fault. 176 176 */ 177 - BUG_ON((unsigned long)(this_cpu_sp0() - current_stack_pointer()) >= 178 - THREAD_SIZE); 177 + BUG_ON((unsigned long)(current_top_of_stack() - 178 + current_stack_pointer()) >= THREAD_SIZE); 179 179 180 180 preempt_count_sub(HARDIRQ_OFFSET); 181 181 }