Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86/i386: Make sure stack-protector segment base is cache aligned

The Intel Optimization Reference Guide says:

In Intel Atom microarchitecture, the address generation unit
assumes that the segment base will be 0 by default. Non-zero
segment base will cause load and store operations to experience
a delay.
- If the segment base isn't aligned to a cache line
boundary, the max throughput of memory operations is
reduced to one [e]very 9 cycles.
[...]
Assembly/Compiler Coding Rule 15. (H impact, ML generality)
For Intel Atom processors, use segments with base set to 0
whenever possible; avoid non-zero segment base address that is
not aligned to cache line boundary at all cost.

We can't avoid having a non-zero base for the stack-protector
segment, but we can make it cache-aligned.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: <stable@kernel.org>
LKML-Reference: <4AA01893.6000507@goop.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>

authored by

Jeremy Fitzhardinge and committed by
Ingo Molnar
1ea0d14e 23386d63

+15 -6
+11 -1
arch/x86/include/asm/processor.h
··· 403 403 extern asmlinkage void ignore_sysret(void); 404 404 #else /* X86_64 */ 405 405 #ifdef CONFIG_CC_STACKPROTECTOR 406 - DECLARE_PER_CPU(unsigned long, stack_canary); 406 + /* 407 + * Make sure stack canary segment base is cached-aligned: 408 + * "For Intel Atom processors, avoid non zero segment base address 409 + * that is not aligned to cache line boundary at all cost." 410 + * (Optim Ref Manual Assembly/Compiler Coding Rule 15.) 411 + */ 412 + struct stack_canary { 413 + char __pad[20]; /* canary at %gs:20 */ 414 + unsigned long canary; 415 + }; 416 + DECLARE_PER_CPU(struct stack_canary, stack_canary) ____cacheline_aligned; 407 417 #endif 408 418 #endif /* X86_64 */ 409 419
+2 -2
arch/x86/include/asm/stackprotector.h
··· 78 78 #ifdef CONFIG_X86_64 79 79 percpu_write(irq_stack_union.stack_canary, canary); 80 80 #else 81 - percpu_write(stack_canary, canary); 81 + percpu_write(stack_canary.canary, canary); 82 82 #endif 83 83 } 84 84 85 85 static inline void setup_stack_canary_segment(int cpu) 86 86 { 87 87 #ifdef CONFIG_X86_32 88 - unsigned long canary = (unsigned long)&per_cpu(stack_canary, cpu) - 20; 88 + unsigned long canary = (unsigned long)&per_cpu(stack_canary, cpu); 89 89 struct desc_struct *gdt_table = get_cpu_gdt_table(cpu); 90 90 struct desc_struct desc; 91 91
+1 -1
arch/x86/include/asm/system.h
··· 31 31 "movl %P[task_canary](%[next]), %%ebx\n\t" \ 32 32 "movl %%ebx, "__percpu_arg([stack_canary])"\n\t" 33 33 #define __switch_canary_oparam \ 34 - , [stack_canary] "=m" (per_cpu_var(stack_canary)) 34 + , [stack_canary] "=m" (per_cpu_var(stack_canary.canary)) 35 35 #define __switch_canary_iparam \ 36 36 , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) 37 37 #else /* CC_STACKPROTECTOR */
+1 -1
arch/x86/kernel/cpu/common.c
··· 1043 1043 #else /* CONFIG_X86_64 */ 1044 1044 1045 1045 #ifdef CONFIG_CC_STACKPROTECTOR 1046 - DEFINE_PER_CPU(unsigned long, stack_canary); 1046 + DEFINE_PER_CPU(struct stack_canary, stack_canary) ____cacheline_aligned; 1047 1047 #endif 1048 1048 1049 1049 /* Make sure %fs and %gs are initialized properly in idle threads */
-1
arch/x86/kernel/head_32.S
··· 439 439 jne 1f 440 440 movl $per_cpu__gdt_page,%eax 441 441 movl $per_cpu__stack_canary,%ecx 442 - subl $20, %ecx 443 442 movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax) 444 443 shrl $16, %ecx 445 444 movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax)