Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

x86: Store a per-cpu shadow copy of CR4

Context switches and TLB flushes can change individual bits of CR4.
CR4 reads take several cycles, so store a shadow copy of CR4 in a
per-cpu variable.

To avoid wasting a cache line, I added the CR4 shadow to
cpu_tlbstate, which is already touched in switch_mm. The heaviest
users of the cr4 shadow will be switch_mm and __switch_to_xtra, and
__switch_to_xtra is called shortly after switch_mm during context
switch, so the cacheline is likely to be hot.

Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Vince Weaver <vince@deater.net>
Cc: "hillf.zj" <hillf.zj@alibaba-inc.com>
Cc: Valdis Kletnieks <Valdis.Kletnieks@vt.edu>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/3a54dd3353fffbf84804398e00dfdc5b7c1afd7d.1414190806.git.luto@amacapital.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>

authored by

Andy Lutomirski and committed by
Ingo Molnar
1e02ce4c 375074cc

+85 -46
+3 -3
arch/x86/include/asm/paravirt.h
··· 80 80 PVOP_VCALL1(pv_mmu_ops.write_cr3, x); 81 81 } 82 82 83 - static inline unsigned long read_cr4(void) 83 + static inline unsigned long __read_cr4(void) 84 84 { 85 85 return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4); 86 86 } 87 - static inline unsigned long read_cr4_safe(void) 87 + static inline unsigned long __read_cr4_safe(void) 88 88 { 89 89 return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4_safe); 90 90 } 91 91 92 - static inline void write_cr4(unsigned long x) 92 + static inline void __write_cr4(unsigned long x) 93 93 { 94 94 PVOP_VCALL1(pv_cpu_ops.write_cr4, x); 95 95 }
+3 -3
arch/x86/include/asm/special_insns.h
··· 137 137 native_write_cr3(x); 138 138 } 139 139 140 - static inline unsigned long read_cr4(void) 140 + static inline unsigned long __read_cr4(void) 141 141 { 142 142 return native_read_cr4(); 143 143 } 144 144 145 - static inline unsigned long read_cr4_safe(void) 145 + static inline unsigned long __read_cr4_safe(void) 146 146 { 147 147 return native_read_cr4_safe(); 148 148 } 149 149 150 - static inline void write_cr4(unsigned long x) 150 + static inline void __write_cr4(unsigned long x) 151 151 { 152 152 native_write_cr4(x); 153 153 }
+39 -13
arch/x86/include/asm/tlbflush.h
··· 15 15 #define __flush_tlb_single(addr) __native_flush_tlb_single(addr) 16 16 #endif 17 17 18 + struct tlb_state { 19 + #ifdef CONFIG_SMP 20 + struct mm_struct *active_mm; 21 + int state; 22 + #endif 23 + 24 + /* 25 + * Access to this CR4 shadow and to H/W CR4 is protected by 26 + * disabling interrupts when modifying either one. 27 + */ 28 + unsigned long cr4; 29 + }; 30 + DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate); 31 + 32 + /* Initialize cr4 shadow for this CPU. */ 33 + static inline void cr4_init_shadow(void) 34 + { 35 + this_cpu_write(cpu_tlbstate.cr4, __read_cr4()); 36 + } 37 + 18 38 /* Set in this cpu's CR4. */ 19 39 static inline void cr4_set_bits(unsigned long mask) 20 40 { 21 41 unsigned long cr4; 22 42 23 - cr4 = read_cr4(); 24 - cr4 |= mask; 25 - write_cr4(cr4); 43 + cr4 = this_cpu_read(cpu_tlbstate.cr4); 44 + if ((cr4 | mask) != cr4) { 45 + cr4 |= mask; 46 + this_cpu_write(cpu_tlbstate.cr4, cr4); 47 + __write_cr4(cr4); 48 + } 26 49 } 27 50 28 51 /* Clear in this cpu's CR4. */ ··· 53 30 { 54 31 unsigned long cr4; 55 32 56 - cr4 = read_cr4(); 57 - cr4 &= ~mask; 58 - write_cr4(cr4); 33 + cr4 = this_cpu_read(cpu_tlbstate.cr4); 34 + if ((cr4 & ~mask) != cr4) { 35 + cr4 &= ~mask; 36 + this_cpu_write(cpu_tlbstate.cr4, cr4); 37 + __write_cr4(cr4); 38 + } 39 + } 40 + 41 + /* Read the CR4 shadow. */ 42 + static inline unsigned long cr4_read_shadow(void) 43 + { 44 + return this_cpu_read(cpu_tlbstate.cr4); 59 45 } 60 46 61 47 /* ··· 93 61 { 94 62 unsigned long cr4; 95 63 96 - cr4 = native_read_cr4(); 64 + cr4 = this_cpu_read(cpu_tlbstate.cr4); 97 65 /* clear PGE */ 98 66 native_write_cr4(cr4 & ~X86_CR4_PGE); 99 67 /* write old PGE again and flush TLBs */ ··· 252 220 253 221 #define TLBSTATE_OK 1 254 222 #define TLBSTATE_LAZY 2 255 - 256 - struct tlb_state { 257 - struct mm_struct *active_mm; 258 - int state; 259 - }; 260 - DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate); 261 223 262 224 static inline void reset_lazy_tlbstate(void) 263 225 {
+1 -1
arch/x86/include/asm/virtext.h
··· 46 46 47 47 static inline int cpu_vmx_enabled(void) 48 48 { 49 - return read_cr4() & X86_CR4_VMXE; 49 + return __read_cr4() & X86_CR4_VMXE; 50 50 } 51 51 52 52 /** Disable VMX if it is enabled on the current CPU
+1 -1
arch/x86/kernel/acpi/sleep.c
··· 78 78 79 79 header->pmode_cr0 = read_cr0(); 80 80 if (__this_cpu_read(cpu_info.cpuid_level) >= 0) { 81 - header->pmode_cr4 = read_cr4(); 81 + header->pmode_cr4 = __read_cr4(); 82 82 header->pmode_behavior |= (1 << WAKEUP_BEHAVIOR_RESTORE_CR4); 83 83 } 84 84 if (!rdmsr_safe(MSR_IA32_MISC_ENABLE,
+7
arch/x86/kernel/cpu/common.c
··· 19 19 #include <asm/archrandom.h> 20 20 #include <asm/hypervisor.h> 21 21 #include <asm/processor.h> 22 + #include <asm/tlbflush.h> 22 23 #include <asm/debugreg.h> 23 24 #include <asm/sections.h> 24 25 #include <asm/vsyscall.h> ··· 1293 1292 int i; 1294 1293 1295 1294 wait_for_master_cpu(cpu); 1295 + 1296 + /* 1297 + * Initialize the CR4 shadow before doing anything that could 1298 + * try to read it. 1299 + */ 1300 + cr4_init_shadow(); 1296 1301 1297 1302 /* 1298 1303 * Load microcode on this cpu if a valid microcode is available.
+3 -3
arch/x86/kernel/cpu/mtrr/cyrix.c
··· 138 138 139 139 /* Save value of CR4 and clear Page Global Enable (bit 7) */ 140 140 if (cpu_has_pge) { 141 - cr4 = read_cr4(); 142 - write_cr4(cr4 & ~X86_CR4_PGE); 141 + cr4 = __read_cr4(); 142 + __write_cr4(cr4 & ~X86_CR4_PGE); 143 143 } 144 144 145 145 /* ··· 171 171 172 172 /* Restore value of CR4 */ 173 173 if (cpu_has_pge) 174 - write_cr4(cr4); 174 + __write_cr4(cr4); 175 175 } 176 176 177 177 static void cyrix_set_arr(unsigned int reg, unsigned long base,
+3 -3
arch/x86/kernel/cpu/mtrr/generic.c
··· 678 678 679 679 /* Save value of CR4 and clear Page Global Enable (bit 7) */ 680 680 if (cpu_has_pge) { 681 - cr4 = read_cr4(); 682 - write_cr4(cr4 & ~X86_CR4_PGE); 681 + cr4 = __read_cr4(); 682 + __write_cr4(cr4 & ~X86_CR4_PGE); 683 683 } 684 684 685 685 /* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */ ··· 708 708 709 709 /* Restore value of CR4 */ 710 710 if (cpu_has_pge) 711 - write_cr4(cr4); 711 + __write_cr4(cr4); 712 712 raw_spin_unlock(&set_atomicity_lock); 713 713 } 714 714
+1
arch/x86/kernel/head32.c
··· 31 31 32 32 asmlinkage __visible void __init i386_start_kernel(void) 33 33 { 34 + cr4_init_shadow(); 34 35 sanitize_boot_params(&boot_params); 35 36 36 37 /* Call the subarch specific early setup function */
+2
arch/x86/kernel/head64.c
··· 155 155 (__START_KERNEL & PGDIR_MASK))); 156 156 BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END); 157 157 158 + cr4_init_shadow(); 159 + 158 160 /* Kill off the identity-map trampoline */ 159 161 reset_early_page_tables(); 160 162
+1 -1
arch/x86/kernel/process_32.c
··· 101 101 cr0 = read_cr0(); 102 102 cr2 = read_cr2(); 103 103 cr3 = read_cr3(); 104 - cr4 = read_cr4_safe(); 104 + cr4 = __read_cr4_safe(); 105 105 printk(KERN_DEFAULT "CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", 106 106 cr0, cr2, cr3, cr4); 107 107
+1 -1
arch/x86/kernel/process_64.c
··· 93 93 cr0 = read_cr0(); 94 94 cr2 = read_cr2(); 95 95 cr3 = read_cr3(); 96 - cr4 = read_cr4(); 96 + cr4 = __read_cr4(); 97 97 98 98 printk(KERN_DEFAULT "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", 99 99 fs, fsindex, gs, gsindex, shadowgs);
+1 -1
arch/x86/kernel/setup.c
··· 1178 1178 1179 1179 if (boot_cpu_data.cpuid_level >= 0) { 1180 1180 /* A CPU has %cr4 if and only if it has CPUID */ 1181 - mmu_cr4_features = read_cr4(); 1181 + mmu_cr4_features = __read_cr4(); 1182 1182 if (trampoline_cr4_features) 1183 1183 *trampoline_cr4_features = mmu_cr4_features; 1184 1184 }
+1 -1
arch/x86/kvm/svm.c
··· 1583 1583 1584 1584 static int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) 1585 1585 { 1586 - unsigned long host_cr4_mce = read_cr4() & X86_CR4_MCE; 1586 + unsigned long host_cr4_mce = cr4_read_shadow() & X86_CR4_MCE; 1587 1587 unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4; 1588 1588 1589 1589 if (cr4 & X86_CR4_VMXE)
+3 -3
arch/x86/kvm/vmx.c
··· 2785 2785 u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); 2786 2786 u64 old, test_bits; 2787 2787 2788 - if (read_cr4() & X86_CR4_VMXE) 2788 + if (cr4_read_shadow() & X86_CR4_VMXE) 2789 2789 return -EBUSY; 2790 2790 2791 2791 INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu)); ··· 4255 4255 vmcs_writel(HOST_CR3, read_cr3()); /* 22.2.3 FIXME: shadow tables */ 4256 4256 4257 4257 /* Save the most likely value for this task's CR4 in the VMCS. */ 4258 - cr4 = read_cr4(); 4258 + cr4 = cr4_read_shadow(); 4259 4259 vmcs_writel(HOST_CR4, cr4); /* 22.2.3, 22.2.5 */ 4260 4260 vmx->host_state.vmcs_host_cr4 = cr4; 4261 4261 ··· 7784 7784 if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty)) 7785 7785 vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); 7786 7786 7787 - cr4 = read_cr4(); 7787 + cr4 = cr4_read_shadow(); 7788 7788 if (unlikely(cr4 != vmx->host_state.vmcs_host_cr4)) { 7789 7789 vmcs_writel(HOST_CR4, cr4); 7790 7790 vmx->host_state.vmcs_host_cr4 = cr4;
+1 -1
arch/x86/mm/fault.c
··· 600 600 printk(nx_warning, from_kuid(&init_user_ns, current_uid())); 601 601 if (pte && pte_present(*pte) && pte_exec(*pte) && 602 602 (pgd_flags(*pgd) & _PAGE_USER) && 603 - (read_cr4() & X86_CR4_SMEP)) 603 + (__read_cr4() & X86_CR4_SMEP)) 604 604 printk(smep_warning, from_kuid(&init_user_ns, current_uid())); 605 605 } 606 606
+9
arch/x86/mm/init.c
··· 713 713 free_area_init_nodes(max_zone_pfns); 714 714 } 715 715 716 + DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = { 717 + #ifdef CONFIG_SMP 718 + .active_mm = &init_mm, 719 + .state = 0, 720 + #endif 721 + .cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */ 722 + }; 723 + EXPORT_SYMBOL_GPL(cpu_tlbstate); 724 + 716 725 void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache) 717 726 { 718 727 /* entry 0 MUST be WB (hardwired to speed up translations) */
-3
arch/x86/mm/tlb.c
··· 14 14 #include <asm/uv/uv.h> 15 15 #include <linux/debugfs.h> 16 16 17 - DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) 18 - = { &init_mm, 0, }; 19 - 20 17 /* 21 18 * Smarter SMP flushing macros. 22 19 * c/o Linus Torvalds.
+4 -7
arch/x86/power/cpu.c
··· 105 105 ctxt->cr0 = read_cr0(); 106 106 ctxt->cr2 = read_cr2(); 107 107 ctxt->cr3 = read_cr3(); 108 - #ifdef CONFIG_X86_32 109 - ctxt->cr4 = read_cr4_safe(); 110 - #else 111 - /* CONFIG_X86_64 */ 112 - ctxt->cr4 = read_cr4(); 108 + ctxt->cr4 = __read_cr4_safe(); 109 + #ifdef CONFIG_X86_64 113 110 ctxt->cr8 = read_cr8(); 114 111 #endif 115 112 ctxt->misc_enable_saved = !rdmsrl_safe(MSR_IA32_MISC_ENABLE, ··· 172 175 /* cr4 was introduced in the Pentium CPU */ 173 176 #ifdef CONFIG_X86_32 174 177 if (ctxt->cr4) 175 - write_cr4(ctxt->cr4); 178 + __write_cr4(ctxt->cr4); 176 179 #else 177 180 /* CONFIG X86_64 */ 178 181 wrmsrl(MSR_EFER, ctxt->efer); 179 182 write_cr8(ctxt->cr8); 180 - write_cr4(ctxt->cr4); 183 + __write_cr4(ctxt->cr4); 181 184 #endif 182 185 write_cr3(ctxt->cr3); 183 186 write_cr2(ctxt->cr2);
+1 -1
arch/x86/realmode/init.c
··· 81 81 82 82 trampoline_header->start = (u64) secondary_startup_64; 83 83 trampoline_cr4_features = &trampoline_header->cr4; 84 - *trampoline_cr4_features = read_cr4(); 84 + *trampoline_cr4_features = __read_cr4(); 85 85 86 86 trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd); 87 87 trampoline_pgd[0] = init_level4_pgt[pgd_index(__PAGE_OFFSET)].pgd;