Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 perf updates from Ingo Molnar:
"This series tightens up RDPMC permissions: currently even highly
sandboxed x86 execution environments (such as seccomp) have permission
to execute RDPMC, which may leak various perf events / PMU state such
as timing information and other CPU execution details.

This 'all is allowed' RDPMC mode is still preserved as the
(non-default) /sys/devices/cpu/rdpmc=2 setting. The new default is
that RDPMC access is only allowed if a perf event is mmap-ed (which is
needed to correctly interpret RDPMC counter values in any case).

As a side effect of these changes CR4 handling is cleaned up in the
x86 code and a shadow copy of the CR4 value is added.

The extra CR4 manipulation adds ~ <50ns to the context switch cost
between rdpmc-capable and rdpmc-non-capable mms"

* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
perf/x86: Add /sys/devices/cpu/rdpmc=2 to allow rdpmc for all tasks
perf/x86: Only allow rdpmc if a perf_event is mapped
perf: Pass the event to arch_perf_update_userpage()
perf: Add pmu callbacks to track event mapping and unmapping
x86: Add a comment clarifying LDT context switching
x86: Store a per-cpu shadow copy of CR4
x86: Clean up cr4 manipulation

+253 -120
+2
arch/x86/include/asm/mmu.h
··· 19 19 20 20 struct mutex lock; 21 21 void __user *vdso; 22 + 23 + atomic_t perf_rdpmc_allowed; /* nonzero if rdpmc is allowed */ 22 24 } mm_context_t; 23 25 24 26 #ifdef CONFIG_SMP
+27 -6
arch/x86/include/asm/mmu_context.h
··· 18 18 } 19 19 #endif /* !CONFIG_PARAVIRT */ 20 20 21 + #ifdef CONFIG_PERF_EVENTS 22 + extern struct static_key rdpmc_always_available; 23 + 24 + static inline void load_mm_cr4(struct mm_struct *mm) 25 + { 26 + if (static_key_true(&rdpmc_always_available) || 27 + atomic_read(&mm->context.perf_rdpmc_allowed)) 28 + cr4_set_bits(X86_CR4_PCE); 29 + else 30 + cr4_clear_bits(X86_CR4_PCE); 31 + } 32 + #else 33 + static inline void load_mm_cr4(struct mm_struct *mm) {} 34 + #endif 35 + 21 36 /* 22 37 * Used for LDT copy/destruction. 23 38 */ ··· 67 52 /* Stop flush ipis for the previous mm */ 68 53 cpumask_clear_cpu(cpu, mm_cpumask(prev)); 69 54 55 + /* Load per-mm CR4 state */ 56 + load_mm_cr4(next); 57 + 70 58 /* 71 59 * Load the LDT, if the LDT is different. 72 60 * 73 - * It's possible leave_mm(prev) has been called. If so, 74 - * then prev->context.ldt could be out of sync with the 75 - * LDT descriptor or the LDT register. This can only happen 76 - * if prev->context.ldt is non-null, since we never free 77 - * an LDT. But LDTs can't be shared across mms, so 78 - * prev->context.ldt won't be equal to next->context.ldt. 61 + * It's possible that prev->context.ldt doesn't match 62 + * the LDT register. This can happen if leave_mm(prev) 63 + * was called and then modify_ldt changed 64 + * prev->context.ldt but suppressed an IPI to this CPU. 65 + * In this case, prev->context.ldt != NULL, because we 66 + * never free an LDT while the mm still exists. That 67 + * means that next->context.ldt != prev->context.ldt, 68 + * because mms never share an LDT. 79 69 */ 80 70 if (unlikely(prev->context.ldt != next->context.ldt)) 81 71 load_LDT_nolock(&next->context); ··· 105 85 */ 106 86 load_cr3(next->pgd); 107 87 trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); 88 + load_mm_cr4(next); 108 89 load_LDT_nolock(&next->context); 109 90 } 110 91 }
+3 -3
arch/x86/include/asm/paravirt.h
··· 80 80 PVOP_VCALL1(pv_mmu_ops.write_cr3, x); 81 81 } 82 82 83 - static inline unsigned long read_cr4(void) 83 + static inline unsigned long __read_cr4(void) 84 84 { 85 85 return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4); 86 86 } 87 - static inline unsigned long read_cr4_safe(void) 87 + static inline unsigned long __read_cr4_safe(void) 88 88 { 89 89 return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4_safe); 90 90 } 91 91 92 - static inline void write_cr4(unsigned long x) 92 + static inline void __write_cr4(unsigned long x) 93 93 { 94 94 PVOP_VCALL1(pv_cpu_ops.write_cr4, x); 95 95 }
-33
arch/x86/include/asm/processor.h
··· 579 579 #define set_iopl_mask native_set_iopl_mask 580 580 #endif /* CONFIG_PARAVIRT */ 581 581 582 - /* 583 - * Save the cr4 feature set we're using (ie 584 - * Pentium 4MB enable and PPro Global page 585 - * enable), so that any CPU's that boot up 586 - * after us can get the correct flags. 587 - */ 588 - extern unsigned long mmu_cr4_features; 589 - extern u32 *trampoline_cr4_features; 590 - 591 - static inline void set_in_cr4(unsigned long mask) 592 - { 593 - unsigned long cr4; 594 - 595 - mmu_cr4_features |= mask; 596 - if (trampoline_cr4_features) 597 - *trampoline_cr4_features = mmu_cr4_features; 598 - cr4 = read_cr4(); 599 - cr4 |= mask; 600 - write_cr4(cr4); 601 - } 602 - 603 - static inline void clear_in_cr4(unsigned long mask) 604 - { 605 - unsigned long cr4; 606 - 607 - mmu_cr4_features &= ~mask; 608 - if (trampoline_cr4_features) 609 - *trampoline_cr4_features = mmu_cr4_features; 610 - cr4 = read_cr4(); 611 - cr4 &= ~mask; 612 - write_cr4(cr4); 613 - } 614 - 615 582 typedef struct { 616 583 unsigned long seg; 617 584 } mm_segment_t;
+3 -3
arch/x86/include/asm/special_insns.h
··· 137 137 native_write_cr3(x); 138 138 } 139 139 140 - static inline unsigned long read_cr4(void) 140 + static inline unsigned long __read_cr4(void) 141 141 { 142 142 return native_read_cr4(); 143 143 } 144 144 145 - static inline unsigned long read_cr4_safe(void) 145 + static inline unsigned long __read_cr4_safe(void) 146 146 { 147 147 return native_read_cr4_safe(); 148 148 } 149 149 150 - static inline void write_cr4(unsigned long x) 150 + static inline void __write_cr4(unsigned long x) 151 151 { 152 152 native_write_cr4(x); 153 153 }
+70 -7
arch/x86/include/asm/tlbflush.h
··· 15 15 #define __flush_tlb_single(addr) __native_flush_tlb_single(addr) 16 16 #endif 17 17 18 + struct tlb_state { 19 + #ifdef CONFIG_SMP 20 + struct mm_struct *active_mm; 21 + int state; 22 + #endif 23 + 24 + /* 25 + * Access to this CR4 shadow and to H/W CR4 is protected by 26 + * disabling interrupts when modifying either one. 27 + */ 28 + unsigned long cr4; 29 + }; 30 + DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate); 31 + 32 + /* Initialize cr4 shadow for this CPU. */ 33 + static inline void cr4_init_shadow(void) 34 + { 35 + this_cpu_write(cpu_tlbstate.cr4, __read_cr4()); 36 + } 37 + 38 + /* Set in this cpu's CR4. */ 39 + static inline void cr4_set_bits(unsigned long mask) 40 + { 41 + unsigned long cr4; 42 + 43 + cr4 = this_cpu_read(cpu_tlbstate.cr4); 44 + if ((cr4 | mask) != cr4) { 45 + cr4 |= mask; 46 + this_cpu_write(cpu_tlbstate.cr4, cr4); 47 + __write_cr4(cr4); 48 + } 49 + } 50 + 51 + /* Clear in this cpu's CR4. */ 52 + static inline void cr4_clear_bits(unsigned long mask) 53 + { 54 + unsigned long cr4; 55 + 56 + cr4 = this_cpu_read(cpu_tlbstate.cr4); 57 + if ((cr4 & ~mask) != cr4) { 58 + cr4 &= ~mask; 59 + this_cpu_write(cpu_tlbstate.cr4, cr4); 60 + __write_cr4(cr4); 61 + } 62 + } 63 + 64 + /* Read the CR4 shadow. */ 65 + static inline unsigned long cr4_read_shadow(void) 66 + { 67 + return this_cpu_read(cpu_tlbstate.cr4); 68 + } 69 + 70 + /* 71 + * Save some of cr4 feature set we're using (e.g. Pentium 4MB 72 + * enable and PPro Global page enable), so that any CPU's that boot 73 + * up after us can get the correct flags. This should only be used 74 + * during boot on the boot cpu. 75 + */ 76 + extern unsigned long mmu_cr4_features; 77 + extern u32 *trampoline_cr4_features; 78 + 79 + static inline void cr4_set_bits_and_update_boot(unsigned long mask) 80 + { 81 + mmu_cr4_features |= mask; 82 + if (trampoline_cr4_features) 83 + *trampoline_cr4_features = mmu_cr4_features; 84 + cr4_set_bits(mask); 85 + } 86 + 18 87 static inline void __native_flush_tlb(void) 19 88 { 20 89 native_write_cr3(native_read_cr3()); ··· 93 24 { 94 25 unsigned long cr4; 95 26 96 - cr4 = native_read_cr4(); 27 + cr4 = this_cpu_read(cpu_tlbstate.cr4); 97 28 /* clear PGE */ 98 29 native_write_cr4(cr4 & ~X86_CR4_PGE); 99 30 /* write old PGE again and flush TLBs */ ··· 252 183 253 184 #define TLBSTATE_OK 1 254 185 #define TLBSTATE_LAZY 2 255 - 256 - struct tlb_state { 257 - struct mm_struct *active_mm; 258 - int state; 259 - }; 260 - DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate); 261 186 262 187 static inline void reset_lazy_tlbstate(void) 263 188 {
+3 -2
arch/x86/include/asm/virtext.h
··· 19 19 20 20 #include <asm/vmx.h> 21 21 #include <asm/svm.h> 22 + #include <asm/tlbflush.h> 22 23 23 24 /* 24 25 * VMX functions: ··· 41 40 static inline void cpu_vmxoff(void) 42 41 { 43 42 asm volatile (ASM_VMX_VMXOFF : : : "cc"); 44 - write_cr4(read_cr4() & ~X86_CR4_VMXE); 43 + cr4_clear_bits(X86_CR4_VMXE); 45 44 } 46 45 47 46 static inline int cpu_vmx_enabled(void) 48 47 { 49 - return read_cr4() & X86_CR4_VMXE; 48 + return __read_cr4() & X86_CR4_VMXE; 50 49 } 51 50 52 51 /** Disable VMX if it is enabled on the current CPU
+1 -1
arch/x86/kernel/acpi/sleep.c
··· 78 78 79 79 header->pmode_cr0 = read_cr0(); 80 80 if (__this_cpu_read(cpu_info.cpuid_level) >= 0) { 81 - header->pmode_cr4 = read_cr4(); 81 + header->pmode_cr4 = __read_cr4(); 82 82 header->pmode_behavior |= (1 << WAKEUP_BEHAVIOR_RESTORE_CR4); 83 83 } 84 84 if (!rdmsr_safe(MSR_IA32_MISC_ENABLE,
+12 -5
arch/x86/kernel/cpu/common.c
··· 19 19 #include <asm/archrandom.h> 20 20 #include <asm/hypervisor.h> 21 21 #include <asm/processor.h> 22 + #include <asm/tlbflush.h> 22 23 #include <asm/debugreg.h> 23 24 #include <asm/sections.h> 24 25 #include <asm/vsyscall.h> ··· 279 278 static __always_inline void setup_smep(struct cpuinfo_x86 *c) 280 279 { 281 280 if (cpu_has(c, X86_FEATURE_SMEP)) 282 - set_in_cr4(X86_CR4_SMEP); 281 + cr4_set_bits(X86_CR4_SMEP); 283 282 } 284 283 285 284 static __init int setup_disable_smap(char *arg) ··· 299 298 300 299 if (cpu_has(c, X86_FEATURE_SMAP)) { 301 300 #ifdef CONFIG_X86_SMAP 302 - set_in_cr4(X86_CR4_SMAP); 301 + cr4_set_bits(X86_CR4_SMAP); 303 302 #else 304 - clear_in_cr4(X86_CR4_SMAP); 303 + cr4_clear_bits(X86_CR4_SMAP); 305 304 #endif 306 305 } 307 306 } ··· 1296 1295 wait_for_master_cpu(cpu); 1297 1296 1298 1297 /* 1298 + * Initialize the CR4 shadow before doing anything that could 1299 + * try to read it. 1300 + */ 1301 + cr4_init_shadow(); 1302 + 1303 + /* 1299 1304 * Load microcode on this cpu if a valid microcode is available. 1300 1305 * This is early microcode loading procedure. 1301 1306 */ ··· 1320 1313 1321 1314 pr_debug("Initializing CPU#%d\n", cpu); 1322 1315 1323 - clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); 1316 + cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); 1324 1317 1325 1318 /* 1326 1319 * Initialize the per-CPU GDT with the boot GDT, ··· 1401 1394 printk(KERN_INFO "Initializing CPU#%d\n", cpu); 1402 1395 1403 1396 if (cpu_feature_enabled(X86_FEATURE_VME) || cpu_has_tsc || cpu_has_de) 1404 - clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); 1397 + cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); 1405 1398 1406 1399 load_current_idt(); 1407 1400 switch_to_new_gdt(cpu);
+2 -1
arch/x86/kernel/cpu/mcheck/mce.c
··· 44 44 45 45 #include <asm/processor.h> 46 46 #include <asm/traps.h> 47 + #include <asm/tlbflush.h> 47 48 #include <asm/mce.h> 48 49 #include <asm/msr.h> 49 50 ··· 1453 1452 bitmap_fill(all_banks, MAX_NR_BANKS); 1454 1453 machine_check_poll(MCP_UC | m_fl, &all_banks); 1455 1454 1456 - set_in_cr4(X86_CR4_MCE); 1455 + cr4_set_bits(X86_CR4_MCE); 1457 1456 1458 1457 rdmsrl(MSR_IA32_MCG_CAP, cap); 1459 1458 if (cap & MCG_CTL_P)
+2 -1
arch/x86/kernel/cpu/mcheck/p5.c
··· 9 9 10 10 #include <asm/processor.h> 11 11 #include <asm/traps.h> 12 + #include <asm/tlbflush.h> 12 13 #include <asm/mce.h> 13 14 #include <asm/msr.h> 14 15 ··· 66 65 "Intel old style machine check architecture supported.\n"); 67 66 68 67 /* Enable MCE: */ 69 - set_in_cr4(X86_CR4_MCE); 68 + cr4_set_bits(X86_CR4_MCE); 70 69 printk(KERN_INFO 71 70 "Intel old style machine check reporting enabled on CPU#%d.\n", 72 71 smp_processor_id());
+2 -1
arch/x86/kernel/cpu/mcheck/winchip.c
··· 8 8 9 9 #include <asm/processor.h> 10 10 #include <asm/traps.h> 11 + #include <asm/tlbflush.h> 11 12 #include <asm/mce.h> 12 13 #include <asm/msr.h> 13 14 ··· 37 36 lo &= ~(1<<4); /* Enable MCE */ 38 37 wrmsr(MSR_IDT_FCR1, lo, hi); 39 38 40 - set_in_cr4(X86_CR4_MCE); 39 + cr4_set_bits(X86_CR4_MCE); 41 40 42 41 printk(KERN_INFO 43 42 "Winchip machine check reporting enabled on CPU#0.\n");
+3 -3
arch/x86/kernel/cpu/mtrr/cyrix.c
··· 138 138 139 139 /* Save value of CR4 and clear Page Global Enable (bit 7) */ 140 140 if (cpu_has_pge) { 141 - cr4 = read_cr4(); 142 - write_cr4(cr4 & ~X86_CR4_PGE); 141 + cr4 = __read_cr4(); 142 + __write_cr4(cr4 & ~X86_CR4_PGE); 143 143 } 144 144 145 145 /* ··· 171 171 172 172 /* Restore value of CR4 */ 173 173 if (cpu_has_pge) 174 - write_cr4(cr4); 174 + __write_cr4(cr4); 175 175 } 176 176 177 177 static void cyrix_set_arr(unsigned int reg, unsigned long base,
+3 -3
arch/x86/kernel/cpu/mtrr/generic.c
··· 678 678 679 679 /* Save value of CR4 and clear Page Global Enable (bit 7) */ 680 680 if (cpu_has_pge) { 681 - cr4 = read_cr4(); 682 - write_cr4(cr4 & ~X86_CR4_PGE); 681 + cr4 = __read_cr4(); 682 + __write_cr4(cr4 & ~X86_CR4_PGE); 683 683 } 684 684 685 685 /* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */ ··· 708 708 709 709 /* Restore value of CR4 */ 710 710 if (cpu_has_pge) 711 - write_cr4(cr4); 711 + __write_cr4(cr4); 712 712 raw_spin_unlock(&set_atomicity_lock); 713 713 } 714 714
+58 -18
arch/x86/kernel/cpu/perf_event.c
··· 31 31 #include <asm/nmi.h> 32 32 #include <asm/smp.h> 33 33 #include <asm/alternative.h> 34 + #include <asm/mmu_context.h> 35 + #include <asm/tlbflush.h> 34 36 #include <asm/timer.h> 35 37 #include <asm/desc.h> 36 38 #include <asm/ldt.h> ··· 44 42 DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { 45 43 .enabled = 1, 46 44 }; 45 + 46 + struct static_key rdpmc_always_available = STATIC_KEY_INIT_FALSE; 47 47 48 48 u64 __read_mostly hw_cache_event_ids 49 49 [PERF_COUNT_HW_CACHE_MAX] ··· 1331 1327 break; 1332 1328 1333 1329 case CPU_STARTING: 1334 - if (x86_pmu.attr_rdpmc) 1335 - set_in_cr4(X86_CR4_PCE); 1336 1330 if (x86_pmu.cpu_starting) 1337 1331 x86_pmu.cpu_starting(cpu); 1338 1332 break; ··· 1806 1804 event->destroy(event); 1807 1805 } 1808 1806 1807 + if (ACCESS_ONCE(x86_pmu.attr_rdpmc)) 1808 + event->hw.flags |= PERF_X86_EVENT_RDPMC_ALLOWED; 1809 + 1809 1810 return err; 1811 + } 1812 + 1813 + static void refresh_pce(void *ignored) 1814 + { 1815 + if (current->mm) 1816 + load_mm_cr4(current->mm); 1817 + } 1818 + 1819 + static void x86_pmu_event_mapped(struct perf_event *event) 1820 + { 1821 + if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED)) 1822 + return; 1823 + 1824 + if (atomic_inc_return(&current->mm->context.perf_rdpmc_allowed) == 1) 1825 + on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1); 1826 + } 1827 + 1828 + static void x86_pmu_event_unmapped(struct perf_event *event) 1829 + { 1830 + if (!current->mm) 1831 + return; 1832 + 1833 + if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED)) 1834 + return; 1835 + 1836 + if (atomic_dec_and_test(&current->mm->context.perf_rdpmc_allowed)) 1837 + on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1); 1810 1838 } 1811 1839 1812 1840 static int x86_pmu_event_idx(struct perf_event *event) 1813 1841 { 1814 1842 int idx = event->hw.idx; 1815 1843 1816 - if (!x86_pmu.attr_rdpmc) 1844 + if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED)) 1817 1845 return 0; 1818 1846 1819 1847 if (x86_pmu.num_counters_fixed && idx >= INTEL_PMC_IDX_FIXED) { ··· 1861 1829 return snprintf(buf, 40, "%d\n", x86_pmu.attr_rdpmc); 1862 1830 } 1863 1831 1864 - static void change_rdpmc(void *info) 1865 - { 1866 - bool enable = !!(unsigned long)info; 1867 - 1868 - if (enable) 1869 - set_in_cr4(X86_CR4_PCE); 1870 - else 1871 - clear_in_cr4(X86_CR4_PCE); 1872 - } 1873 - 1874 1832 static ssize_t set_attr_rdpmc(struct device *cdev, 1875 1833 struct device_attribute *attr, 1876 1834 const char *buf, size_t count) ··· 1872 1850 if (ret) 1873 1851 return ret; 1874 1852 1853 + if (val > 2) 1854 + return -EINVAL; 1855 + 1875 1856 if (x86_pmu.attr_rdpmc_broken) 1876 1857 return -ENOTSUPP; 1877 1858 1878 - if (!!val != !!x86_pmu.attr_rdpmc) { 1879 - x86_pmu.attr_rdpmc = !!val; 1880 - on_each_cpu(change_rdpmc, (void *)val, 1); 1859 + if ((val == 2) != (x86_pmu.attr_rdpmc == 2)) { 1860 + /* 1861 + * Changing into or out of always available, aka 1862 + * perf-event-bypassing mode. This path is extremely slow, 1863 + * but only root can trigger it, so it's okay. 1864 + */ 1865 + if (val == 2) 1866 + static_key_slow_inc(&rdpmc_always_available); 1867 + else 1868 + static_key_slow_dec(&rdpmc_always_available); 1869 + on_each_cpu(refresh_pce, NULL, 1); 1881 1870 } 1871 + 1872 + x86_pmu.attr_rdpmc = val; 1882 1873 1883 1874 return count; 1884 1875 } ··· 1935 1900 1936 1901 .event_init = x86_pmu_event_init, 1937 1902 1903 + .event_mapped = x86_pmu_event_mapped, 1904 + .event_unmapped = x86_pmu_event_unmapped, 1905 + 1938 1906 .add = x86_pmu_add, 1939 1907 .del = x86_pmu_del, 1940 1908 .start = x86_pmu_start, ··· 1952 1914 .flush_branch_stack = x86_pmu_flush_branch_stack, 1953 1915 }; 1954 1916 1955 - void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) 1917 + void arch_perf_update_userpage(struct perf_event *event, 1918 + struct perf_event_mmap_page *userpg, u64 now) 1956 1919 { 1957 1920 struct cyc2ns_data *data; 1958 1921 1959 1922 userpg->cap_user_time = 0; 1960 1923 userpg->cap_user_time_zero = 0; 1961 - userpg->cap_user_rdpmc = x86_pmu.attr_rdpmc; 1924 + userpg->cap_user_rdpmc = 1925 + !!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED); 1962 1926 userpg->pmc_width = x86_pmu.cntval_bits; 1963 1927 1964 1928 if (!sched_clock_stable())
+2
arch/x86/kernel/cpu/perf_event.h
··· 71 71 #define PERF_X86_EVENT_COMMITTED 0x8 /* event passed commit_txn */ 72 72 #define PERF_X86_EVENT_PEBS_LD_HSW 0x10 /* haswell style datala, load */ 73 73 #define PERF_X86_EVENT_PEBS_NA_HSW 0x20 /* haswell style datala, unknown */ 74 + #define PERF_X86_EVENT_RDPMC_ALLOWED 0x40 /* grant rdpmc permission */ 75 + 74 76 75 77 struct amd_nb { 76 78 int nb_id; /* NorthBridge id */
+1
arch/x86/kernel/head32.c
··· 31 31 32 32 asmlinkage __visible void __init i386_start_kernel(void) 33 33 { 34 + cr4_init_shadow(); 34 35 sanitize_boot_params(&boot_params); 35 36 36 37 /* Call the subarch specific early setup function */
+2
arch/x86/kernel/head64.c
··· 156 156 (__START_KERNEL & PGDIR_MASK))); 157 157 BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END); 158 158 159 + cr4_init_shadow(); 160 + 159 161 /* Kill off the identity-map trampoline */ 160 162 reset_early_page_tables(); 161 163
+2 -1
arch/x86/kernel/i387.c
··· 13 13 #include <asm/sigcontext.h> 14 14 #include <asm/processor.h> 15 15 #include <asm/math_emu.h> 16 + #include <asm/tlbflush.h> 16 17 #include <asm/uaccess.h> 17 18 #include <asm/ptrace.h> 18 19 #include <asm/i387.h> ··· 194 193 if (cpu_has_xmm) 195 194 cr4_mask |= X86_CR4_OSXMMEXCPT; 196 195 if (cr4_mask) 197 - set_in_cr4(cr4_mask); 196 + cr4_set_bits(cr4_mask); 198 197 199 198 cr0 = read_cr0(); 200 199 cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */
+3 -2
arch/x86/kernel/process.c
··· 28 28 #include <asm/fpu-internal.h> 29 29 #include <asm/debugreg.h> 30 30 #include <asm/nmi.h> 31 + #include <asm/tlbflush.h> 31 32 32 33 /* 33 34 * per-CPU TSS segments. Threads are completely 'soft' on Linux, ··· 142 141 143 142 static void hard_disable_TSC(void) 144 143 { 145 - write_cr4(read_cr4() | X86_CR4_TSD); 144 + cr4_set_bits(X86_CR4_TSD); 146 145 } 147 146 148 147 void disable_TSC(void) ··· 159 158 160 159 static void hard_enable_TSC(void) 161 160 { 162 - write_cr4(read_cr4() & ~X86_CR4_TSD); 161 + cr4_clear_bits(X86_CR4_TSD); 163 162 } 164 163 165 164 static void enable_TSC(void)
+1 -1
arch/x86/kernel/process_32.c
··· 101 101 cr0 = read_cr0(); 102 102 cr2 = read_cr2(); 103 103 cr3 = read_cr3(); 104 - cr4 = read_cr4_safe(); 104 + cr4 = __read_cr4_safe(); 105 105 printk(KERN_DEFAULT "CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", 106 106 cr0, cr2, cr3, cr4); 107 107
+1 -1
arch/x86/kernel/process_64.c
··· 93 93 cr0 = read_cr0(); 94 94 cr2 = read_cr2(); 95 95 cr3 = read_cr3(); 96 - cr4 = read_cr4(); 96 + cr4 = __read_cr4(); 97 97 98 98 printk(KERN_DEFAULT "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", 99 99 fs, fsindex, gs, gsindex, shadowgs);
+1 -1
arch/x86/kernel/setup.c
··· 1179 1179 1180 1180 if (boot_cpu_data.cpuid_level >= 0) { 1181 1181 /* A CPU has %cr4 if and only if it has CPUID */ 1182 - mmu_cr4_features = read_cr4(); 1182 + mmu_cr4_features = __read_cr4(); 1183 1183 if (trampoline_cr4_features) 1184 1184 *trampoline_cr4_features = mmu_cr4_features; 1185 1185 }
+2 -1
arch/x86/kernel/xsave.c
··· 12 12 #include <asm/i387.h> 13 13 #include <asm/fpu-internal.h> 14 14 #include <asm/sigframe.h> 15 + #include <asm/tlbflush.h> 15 16 #include <asm/xcr.h> 16 17 17 18 /* ··· 454 453 */ 455 454 static inline void xstate_enable(void) 456 455 { 457 - set_in_cr4(X86_CR4_OSXSAVE); 456 + cr4_set_bits(X86_CR4_OSXSAVE); 458 457 xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask); 459 458 } 460 459
+1 -1
arch/x86/kvm/svm.c
··· 1583 1583 1584 1584 static int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) 1585 1585 { 1586 - unsigned long host_cr4_mce = read_cr4() & X86_CR4_MCE; 1586 + unsigned long host_cr4_mce = cr4_read_shadow() & X86_CR4_MCE; 1587 1587 unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4; 1588 1588 1589 1589 if (cr4 & X86_CR4_VMXE)
+5 -5
arch/x86/kvm/vmx.c
··· 2871 2871 u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); 2872 2872 u64 old, test_bits; 2873 2873 2874 - if (read_cr4() & X86_CR4_VMXE) 2874 + if (cr4_read_shadow() & X86_CR4_VMXE) 2875 2875 return -EBUSY; 2876 2876 2877 2877 INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu)); ··· 2898 2898 /* enable and lock */ 2899 2899 wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits); 2900 2900 } 2901 - write_cr4(read_cr4() | X86_CR4_VMXE); /* FIXME: not cpu hotplug safe */ 2901 + cr4_set_bits(X86_CR4_VMXE); 2902 2902 2903 2903 if (vmm_exclusive) { 2904 2904 kvm_cpu_vmxon(phys_addr); ··· 2935 2935 vmclear_local_loaded_vmcss(); 2936 2936 kvm_cpu_vmxoff(); 2937 2937 } 2938 - write_cr4(read_cr4() & ~X86_CR4_VMXE); 2938 + cr4_clear_bits(X86_CR4_VMXE); 2939 2939 } 2940 2940 2941 2941 static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, ··· 4450 4450 vmcs_writel(HOST_CR3, read_cr3()); /* 22.2.3 FIXME: shadow tables */ 4451 4451 4452 4452 /* Save the most likely value for this task's CR4 in the VMCS. */ 4453 - cr4 = read_cr4(); 4453 + cr4 = cr4_read_shadow(); 4454 4454 vmcs_writel(HOST_CR4, cr4); /* 22.2.3, 22.2.5 */ 4455 4455 vmx->host_state.vmcs_host_cr4 = cr4; 4456 4456 ··· 8146 8146 if (test_bit(VCPU_REGS_RIP, (unsigned long *)&vcpu->arch.regs_dirty)) 8147 8147 vmcs_writel(GUEST_RIP, vcpu->arch.regs[VCPU_REGS_RIP]); 8148 8148 8149 - cr4 = read_cr4(); 8149 + cr4 = cr4_read_shadow(); 8150 8150 if (unlikely(cr4 != vmx->host_state.vmcs_host_cr4)) { 8151 8151 vmcs_writel(HOST_CR4, cr4); 8152 8152 vmx->host_state.vmcs_host_cr4 = cr4;
+1 -1
arch/x86/mm/fault.c
··· 600 600 printk(nx_warning, from_kuid(&init_user_ns, current_uid())); 601 601 if (pte && pte_present(*pte) && pte_exec(*pte) && 602 602 (pgd_flags(*pgd) & _PAGE_USER) && 603 - (read_cr4() & X86_CR4_SMEP)) 603 + (__read_cr4() & X86_CR4_SMEP)) 604 604 printk(smep_warning, from_kuid(&init_user_ns, current_uid())); 605 605 } 606 606
+11 -2
arch/x86/mm/init.c
··· 173 173 174 174 /* Enable PSE if available */ 175 175 if (cpu_has_pse) 176 - set_in_cr4(X86_CR4_PSE); 176 + cr4_set_bits_and_update_boot(X86_CR4_PSE); 177 177 178 178 /* Enable PGE if available */ 179 179 if (cpu_has_pge) { 180 - set_in_cr4(X86_CR4_PGE); 180 + cr4_set_bits_and_update_boot(X86_CR4_PGE); 181 181 __supported_pte_mask |= _PAGE_GLOBAL; 182 182 } 183 183 } ··· 712 712 713 713 free_area_init_nodes(max_zone_pfns); 714 714 } 715 + 716 + DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = { 717 + #ifdef CONFIG_SMP 718 + .active_mm = &init_mm, 719 + .state = 0, 720 + #endif 721 + .cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */ 722 + }; 723 + EXPORT_SYMBOL_GPL(cpu_tlbstate); 715 724 716 725 void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache) 717 726 {
-3
arch/x86/mm/tlb.c
··· 14 14 #include <asm/uv/uv.h> 15 15 #include <linux/debugfs.h> 16 16 17 - DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) 18 - = { &init_mm, 0, }; 19 - 20 17 /* 21 18 * Smarter SMP flushing macros. 22 19 * c/o Linus Torvalds.
+4 -7
arch/x86/power/cpu.c
··· 105 105 ctxt->cr0 = read_cr0(); 106 106 ctxt->cr2 = read_cr2(); 107 107 ctxt->cr3 = read_cr3(); 108 - #ifdef CONFIG_X86_32 109 - ctxt->cr4 = read_cr4_safe(); 110 - #else 111 - /* CONFIG_X86_64 */ 112 - ctxt->cr4 = read_cr4(); 108 + ctxt->cr4 = __read_cr4_safe(); 109 + #ifdef CONFIG_X86_64 113 110 ctxt->cr8 = read_cr8(); 114 111 #endif 115 112 ctxt->misc_enable_saved = !rdmsrl_safe(MSR_IA32_MISC_ENABLE, ··· 172 175 /* cr4 was introduced in the Pentium CPU */ 173 176 #ifdef CONFIG_X86_32 174 177 if (ctxt->cr4) 175 - write_cr4(ctxt->cr4); 178 + __write_cr4(ctxt->cr4); 176 179 #else 177 180 /* CONFIG X86_64 */ 178 181 wrmsrl(MSR_EFER, ctxt->efer); 179 182 write_cr8(ctxt->cr8); 180 - write_cr4(ctxt->cr4); 183 + __write_cr4(ctxt->cr4); 181 184 #endif 182 185 write_cr3(ctxt->cr3); 183 186 write_cr2(ctxt->cr2);
+1 -1
arch/x86/realmode/init.c
··· 81 81 82 82 trampoline_header->start = (u64) secondary_startup_64; 83 83 trampoline_cr4_features = &trampoline_header->cr4; 84 - *trampoline_cr4_features = read_cr4(); 84 + *trampoline_cr4_features = __read_cr4(); 85 85 86 86 trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd); 87 87 trampoline_pgd[0] = init_level4_pgt[pgd_index(__PAGE_OFFSET)].pgd;
+2 -2
arch/x86/xen/enlighten.c
··· 1494 1494 * set them here. For all, OSFXSR OSXMMEXCPT are set in fpu_init. 1495 1495 */ 1496 1496 if (cpu_has_pse) 1497 - set_in_cr4(X86_CR4_PSE); 1497 + cr4_set_bits_and_update_boot(X86_CR4_PSE); 1498 1498 1499 1499 if (cpu_has_pge) 1500 - set_in_cr4(X86_CR4_PGE); 1500 + cr4_set_bits_and_update_boot(X86_CR4_PGE); 1501 1501 } 1502 1502 1503 1503 /*
+3 -2
drivers/lguest/x86/core.c
··· 47 47 #include <asm/lguest.h> 48 48 #include <asm/uaccess.h> 49 49 #include <asm/i387.h> 50 + #include <asm/tlbflush.h> 50 51 #include "../lg.h" 51 52 52 53 static int cpu_had_pge; ··· 453 452 static void adjust_pge(void *on) 454 453 { 455 454 if (on) 456 - write_cr4(read_cr4() | X86_CR4_PGE); 455 + cr4_set_bits(X86_CR4_PGE); 457 456 else 458 - write_cr4(read_cr4() & ~X86_CR4_PGE); 457 + cr4_clear_bits(X86_CR4_PGE); 459 458 } 460 459 461 460 /*H:020
+7
include/linux/perf_event.h
··· 202 202 */ 203 203 int (*event_init) (struct perf_event *event); 204 204 205 + /* 206 + * Notification that the event was mapped or unmapped. Called 207 + * in the context of the mapping task. 208 + */ 209 + void (*event_mapped) (struct perf_event *event); /*optional*/ 210 + void (*event_unmapped) (struct perf_event *event); /*optional*/ 211 + 205 212 #define PERF_EF_START 0x01 /* start the counter when adding */ 206 213 #define PERF_EF_RELOAD 0x02 /* reload the counter when starting */ 207 214 #define PERF_EF_UPDATE 0x04 /* update the counter when stopping */
+12 -2
kernel/events/core.c
··· 4101 4101 rcu_read_unlock(); 4102 4102 } 4103 4103 4104 - void __weak arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) 4104 + void __weak arch_perf_update_userpage( 4105 + struct perf_event *event, struct perf_event_mmap_page *userpg, u64 now) 4105 4106 { 4106 4107 } 4107 4108 ··· 4152 4151 userpg->time_running = running + 4153 4152 atomic64_read(&event->child_total_time_running); 4154 4153 4155 - arch_perf_update_userpage(userpg, now); 4154 + arch_perf_update_userpage(event, userpg, now); 4156 4155 4157 4156 barrier(); 4158 4157 ++userpg->lock; ··· 4294 4293 4295 4294 atomic_inc(&event->mmap_count); 4296 4295 atomic_inc(&event->rb->mmap_count); 4296 + 4297 + if (event->pmu->event_mapped) 4298 + event->pmu->event_mapped(event); 4297 4299 } 4298 4300 4299 4301 /* ··· 4315 4311 struct user_struct *mmap_user = rb->mmap_user; 4316 4312 int mmap_locked = rb->mmap_locked; 4317 4313 unsigned long size = perf_data_size(rb); 4314 + 4315 + if (event->pmu->event_unmapped) 4316 + event->pmu->event_unmapped(event); 4318 4317 4319 4318 atomic_dec(&rb->mmap_count); 4320 4319 ··· 4519 4512 */ 4520 4513 vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP; 4521 4514 vma->vm_ops = &perf_mmap_vmops; 4515 + 4516 + if (event->pmu->event_mapped) 4517 + event->pmu->event_mapped(event); 4522 4518 4523 4519 return ret; 4524 4520 }