Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86: Keep thread_info on thread stack in x86_32

x86_64 uses a per_cpu variable kernel_stack to always point to
the thread stack of current. This is where the thread_info is stored
and is accessed from this location even when the irq or exception stack
is in use. This removes the complexity of having to maintain the
thread info on the stack when interrupts are running and having to
copy the preempt_count and other fields to the interrupt stack.

x86_32 uses the old method of copying the thread_info from the thread
stack to the exception stack just before executing the exception.

Having the two different requires #ifdefs and also the x86_32 way
is a bit of a pain to maintain. By converting x86_32 to the same
method of x86_64, we can remove #ifdefs, clean up the x86_32 code
a little, and remove the overhead of the copy.

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Brian Gerst <brgerst@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/20110806012354.263834829@goodmis.org
Link: http://lkml.kernel.org/r/20140206144321.852942014@goodmis.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>

authored by

Steven Rostedt and committed by
H. Peter Anvin
198d208d 0788aa6a

+89 -100
+9
arch/x86/include/asm/processor.h
··· 449 449 }; 450 450 DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); 451 451 #endif 452 + /* 453 + * per-CPU IRQ handling stacks 454 + */ 455 + struct irq_stack { 456 + u32 stack[THREAD_SIZE/sizeof(u32)]; 457 + } __aligned(THREAD_SIZE); 458 + 459 + DECLARE_PER_CPU(struct irq_stack *, hardirq_stack); 460 + DECLARE_PER_CPU(struct irq_stack *, softirq_stack); 452 461 #endif /* X86_64 */ 453 462 454 463 extern unsigned int xstate_size;
+5 -44
arch/x86/include/asm/thread_info.h
··· 9 9 10 10 #include <linux/compiler.h> 11 11 #include <asm/page.h> 12 + #include <asm/percpu.h> 12 13 #include <asm/types.h> 13 14 14 15 /* ··· 35 34 void __user *sysenter_return; 36 35 unsigned int sig_on_uaccess_error:1; 37 36 unsigned int uaccess_err:1; /* uaccess failed */ 38 - #ifdef CONFIG_X86_32 39 - unsigned long previous_esp; /* ESP of the previous stack in 40 - case of nested (IRQ) stacks 41 - (Moved to end, to be removed soon) 42 - */ 43 - #endif 44 37 }; 45 38 46 39 #define INIT_THREAD_INFO(tsk) \ ··· 148 153 #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) 149 154 #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) 150 155 151 - #ifdef CONFIG_X86_32 156 + #define STACK_WARN (THREAD_SIZE/8) 157 + #define KERNEL_STACK_OFFSET (5*(BITS_PER_LONG/8)) 152 158 153 - #define STACK_WARN (THREAD_SIZE/8) 154 159 /* 155 160 * macros/functions for gaining access to the thread information structure 156 161 * ··· 158 163 */ 159 164 #ifndef __ASSEMBLY__ 160 165 161 - #define current_stack_pointer ({ \ 162 - unsigned long sp; \ 163 - asm("mov %%esp,%0" : "=g" (sp)); \ 164 - sp; \ 165 - }) 166 - 167 - /* how to get the thread information struct from C */ 168 - static inline struct thread_info *current_thread_info(void) 169 - { 170 - return (struct thread_info *) 171 - (current_stack_pointer & ~(THREAD_SIZE - 1)); 172 - } 173 - 174 - #else /* !__ASSEMBLY__ */ 175 - 176 - /* how to get the thread information struct from ASM */ 177 - #define GET_THREAD_INFO(reg) \ 178 - movl $-THREAD_SIZE, reg; \ 179 - andl %esp, reg 180 - 181 - #endif 182 - 183 - #else /* X86_32 */ 184 - 185 - #include <asm/percpu.h> 186 - #define KERNEL_STACK_OFFSET (5*8) 187 - 188 - /* 189 - * macros/functions for gaining access to the thread information structure 190 - * preempt_count needs to be 1 initially, until the scheduler is functional. 191 - */ 192 - #ifndef __ASSEMBLY__ 193 166 DECLARE_PER_CPU(unsigned long, kernel_stack); 194 167 195 168 static inline struct thread_info *current_thread_info(void) ··· 172 209 173 210 /* how to get the thread information struct from ASM */ 174 211 #define GET_THREAD_INFO(reg) \ 175 - movq PER_CPU_VAR(kernel_stack),reg ; \ 176 - subq $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg 212 + _ASM_MOV PER_CPU_VAR(kernel_stack),reg ; \ 213 + _ASM_SUB $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg ; 177 214 178 215 /* 179 216 * Same if PER_CPU_VAR(kernel_stack) is, perhaps with some offset, already in ··· 182 219 #define THREAD_INFO(reg, off) KERNEL_STACK_OFFSET+(off)-THREAD_SIZE(reg) 183 220 184 221 #endif 185 - 186 - #endif /* !X86_32 */ 187 222 188 223 /* 189 224 * Thread-synchronous status.
+4 -4
arch/x86/kernel/cpu/common.c
··· 1078 1078 } 1079 1079 __setup("clearcpuid=", setup_disablecpuid); 1080 1080 1081 + DEFINE_PER_CPU(unsigned long, kernel_stack) = 1082 + (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE; 1083 + EXPORT_PER_CPU_SYMBOL(kernel_stack); 1084 + 1081 1085 #ifdef CONFIG_X86_64 1082 1086 struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table }; 1083 1087 struct desc_ptr debug_idt_descr = { NR_VECTORS * 16 - 1, ··· 1097 1093 DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned = 1098 1094 &init_task; 1099 1095 EXPORT_PER_CPU_SYMBOL(current_task); 1100 - 1101 - DEFINE_PER_CPU(unsigned long, kernel_stack) = 1102 - (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE; 1103 - EXPORT_PER_CPU_SYMBOL(kernel_stack); 1104 1096 1105 1097 DEFINE_PER_CPU(char *, irq_stack_ptr) = 1106 1098 init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
+34 -7
arch/x86/kernel/dumpstack_32.c
··· 16 16 17 17 #include <asm/stacktrace.h> 18 18 19 + static void *is_irq_stack(void *p, void *irq) 20 + { 21 + if (p < irq || p >= (irq + THREAD_SIZE)) 22 + return NULL; 23 + return irq + THREAD_SIZE; 24 + } 25 + 26 + 27 + static void *is_hardirq_stack(unsigned long *stack, int cpu) 28 + { 29 + void *irq = per_cpu(hardirq_stack, cpu); 30 + 31 + return is_irq_stack(stack, irq); 32 + } 33 + 34 + static void *is_softirq_stack(unsigned long *stack, int cpu) 35 + { 36 + void *irq = per_cpu(softirq_stack, cpu); 37 + 38 + return is_irq_stack(stack, irq); 39 + } 19 40 20 41 void dump_trace(struct task_struct *task, struct pt_regs *regs, 21 42 unsigned long *stack, unsigned long bp, 22 43 const struct stacktrace_ops *ops, void *data) 23 44 { 45 + const unsigned cpu = get_cpu(); 24 46 int graph = 0; 25 47 u32 *prev_esp; 26 48 ··· 62 40 63 41 for (;;) { 64 42 struct thread_info *context; 43 + void *end_stack; 65 44 66 - context = (struct thread_info *) 67 - ((unsigned long)stack & (~(THREAD_SIZE - 1))); 68 - bp = ops->walk_stack(context, stack, bp, ops, data, NULL, &graph); 45 + end_stack = is_hardirq_stack(stack, cpu); 46 + if (!end_stack) 47 + end_stack = is_softirq_stack(stack, cpu); 48 + 49 + context = task_thread_info(task); 50 + bp = ops->walk_stack(context, stack, bp, ops, data, 51 + end_stack, &graph); 69 52 70 53 /* Stop if not on irq stack */ 71 - if (task_stack_page(task) == context) 54 + if (!end_stack) 72 55 break; 73 56 74 - /* The previous esp is just above the context */ 75 - prev_esp = (u32 *) ((char *)context + sizeof(struct thread_info) - 76 - sizeof(long)); 57 + /* The previous esp is saved on the bottom of the stack */ 58 + prev_esp = (u32 *)(end_stack - THREAD_SIZE); 77 59 stack = (unsigned long *)*prev_esp; 78 60 if (!stack) 79 61 break; ··· 86 60 break; 87 61 touch_nmi_watchdog(); 88 62 } 63 + put_cpu(); 89 64 } 90 65 EXPORT_SYMBOL(dump_trace); 91 66
+31 -43
arch/x86/kernel/irq_32.c
··· 55 55 static inline void print_stack_overflow(void) { } 56 56 #endif 57 57 58 - /* 59 - * per-CPU IRQ handling contexts (thread information and stack) 60 - */ 61 - union irq_ctx { 62 - struct thread_info tinfo; 63 - u32 stack[THREAD_SIZE/sizeof(u32)]; 64 - } __attribute__((aligned(THREAD_SIZE))); 65 - 66 - static DEFINE_PER_CPU(union irq_ctx *, hardirq_ctx); 67 - static DEFINE_PER_CPU(union irq_ctx *, softirq_ctx); 58 + DEFINE_PER_CPU(struct irq_stack *, hardirq_stack); 59 + DEFINE_PER_CPU(struct irq_stack *, softirq_stack); 68 60 69 61 static void call_on_stack(void *func, void *stack) 70 62 { ··· 69 77 : "memory", "cc", "edx", "ecx", "eax"); 70 78 } 71 79 80 + /* how to get the current stack pointer from C */ 81 + register unsigned long current_stack_pointer asm("esp") __used; 82 + 83 + static inline void *current_stack(void) 84 + { 85 + return (void *)(current_stack_pointer & ~(THREAD_SIZE - 1)); 86 + } 87 + 72 88 static inline int 73 89 execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) 74 90 { 75 - union irq_ctx *curctx, *irqctx; 91 + struct irq_stack *curstk, *irqstk; 76 92 u32 *isp, *prev_esp, arg1, arg2; 77 93 78 - curctx = (union irq_ctx *) current_thread_info(); 79 - irqctx = __this_cpu_read(hardirq_ctx); 94 + curstk = (struct irq_stack *) current_stack(); 95 + irqstk = __this_cpu_read(hardirq_stack); 80 96 81 97 /* 82 98 * this is where we switch to the IRQ stack. However, if we are ··· 92 92 * handler) we can't do that and just have to keep using the 93 93 * current stack (which is the irq stack already after all) 94 94 */ 95 - if (unlikely(curctx == irqctx)) 95 + if (unlikely(curstk == irqstk)) 96 96 return 0; 97 97 98 - /* build the stack frame on the IRQ stack */ 99 - isp = (u32 *) ((char *)irqctx + sizeof(*irqctx)); 100 - irqctx->tinfo.task = curctx->tinfo.task; 101 - /* Save the next esp after thread_info */ 102 - prev_esp = (u32 *) ((char *)irqctx + sizeof(struct thread_info) - 103 - sizeof(long)); 98 + isp = (u32 *) ((char *)irqstk + sizeof(*irqstk)); 99 + 100 + /* Save the next esp at the bottom of the stack */ 101 + prev_esp = (u32 *)irqstk; 104 102 *prev_esp = current_stack_pointer; 105 103 106 104 if (unlikely(overflow)) ··· 119 121 */ 120 122 void irq_ctx_init(int cpu) 121 123 { 122 - union irq_ctx *irqctx; 124 + struct irq_stack *irqstk; 123 125 124 - if (per_cpu(hardirq_ctx, cpu)) 126 + if (per_cpu(hardirq_stack, cpu)) 125 127 return; 126 128 127 - irqctx = page_address(alloc_pages_node(cpu_to_node(cpu), 129 + irqstk = page_address(alloc_pages_node(cpu_to_node(cpu), 128 130 THREADINFO_GFP, 129 131 THREAD_SIZE_ORDER)); 130 - memset(&irqctx->tinfo, 0, sizeof(struct thread_info)); 131 - irqctx->tinfo.cpu = cpu; 132 - irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); 132 + per_cpu(hardirq_stack, cpu) = irqstk; 133 133 134 - per_cpu(hardirq_ctx, cpu) = irqctx; 135 - 136 - irqctx = page_address(alloc_pages_node(cpu_to_node(cpu), 134 + irqstk = page_address(alloc_pages_node(cpu_to_node(cpu), 137 135 THREADINFO_GFP, 138 136 THREAD_SIZE_ORDER)); 139 - memset(&irqctx->tinfo, 0, sizeof(struct thread_info)); 140 - irqctx->tinfo.cpu = cpu; 141 - irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); 142 - 143 - per_cpu(softirq_ctx, cpu) = irqctx; 137 + per_cpu(softirq_stack, cpu) = irqstk; 144 138 145 139 printk(KERN_DEBUG "CPU %u irqstacks, hard=%p soft=%p\n", 146 - cpu, per_cpu(hardirq_ctx, cpu), per_cpu(softirq_ctx, cpu)); 140 + cpu, per_cpu(hardirq_stack, cpu), per_cpu(softirq_stack, cpu)); 147 141 } 148 142 149 143 void do_softirq_own_stack(void) 150 144 { 151 - struct thread_info *curctx; 152 - union irq_ctx *irqctx; 145 + struct thread_info *curstk; 146 + struct irq_stack *irqstk; 153 147 u32 *isp, *prev_esp; 154 148 155 - curctx = current_thread_info(); 156 - irqctx = __this_cpu_read(softirq_ctx); 157 - irqctx->tinfo.task = curctx->task; 149 + curstk = current_stack(); 150 + irqstk = __this_cpu_read(softirq_stack); 158 151 159 152 /* build the stack frame on the softirq stack */ 160 - isp = (u32 *) ((char *)irqctx + sizeof(*irqctx)); 153 + isp = (u32 *) ((char *)irqstk + sizeof(*irqstk)); 161 154 162 155 /* Push the previous esp onto the stack */ 163 - prev_esp = (u32 *) ((char *)irqctx + sizeof(struct thread_info) - 164 - sizeof(long)); 156 + prev_esp = (u32 *)irqstk; 165 157 *prev_esp = current_stack_pointer; 166 158 167 159 call_on_stack(__do_softirq, isp);
+4
arch/x86/kernel/process_32.c
··· 314 314 */ 315 315 arch_end_context_switch(next_p); 316 316 317 + this_cpu_write(kernel_stack, 318 + (unsigned long)task_stack_page(next_p) + 319 + THREAD_SIZE - KERNEL_STACK_OFFSET); 320 + 317 321 /* 318 322 * Restore %gs if needed (which is common) 319 323 */
+1 -1
arch/x86/kernel/ptrace.c
··· 189 189 if (context == (sp & ~(THREAD_SIZE - 1))) 190 190 return sp; 191 191 192 - prev_esp = (u32 *)(context + sizeof(struct thread_info) - sizeof(long)); 192 + prev_esp = (u32 *)(context); 193 193 if (prev_esp) 194 194 return (unsigned long)prev_esp; 195 195
+1 -1
arch/x86/kernel/smpboot.c
··· 758 758 #else 759 759 clear_tsk_thread_flag(idle, TIF_FORK); 760 760 initial_gs = per_cpu_offset(cpu); 761 + #endif 761 762 per_cpu(kernel_stack, cpu) = 762 763 (unsigned long)task_stack_page(idle) - 763 764 KERNEL_STACK_OFFSET + THREAD_SIZE; 764 - #endif 765 765 early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); 766 766 initial_code = (unsigned long)start_secondary; 767 767 stack_start = idle->thread.sp;