Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

perf bench: Copy kernel files needed to build mem{cpy,set} x86_64 benchmarks

We can't access kernel files directly from tools/, so copy the required
bits, and make sure that we detect when the original files, in the
kernel, gets modified.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-z7e76274ch5j4nugv048qacb@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

+939 -7
+316
tools/arch/x86/include/asm/cpufeatures.h
··· 1 + #ifndef _ASM_X86_CPUFEATURES_H 2 + #define _ASM_X86_CPUFEATURES_H 3 + 4 + #ifndef _ASM_X86_REQUIRED_FEATURES_H 5 + #include <asm/required-features.h> 6 + #endif 7 + 8 + #ifndef _ASM_X86_DISABLED_FEATURES_H 9 + #include <asm/disabled-features.h> 10 + #endif 11 + 12 + /* 13 + * Defines x86 CPU feature bits 14 + */ 15 + #define NCAPINTS 18 /* N 32-bit words worth of info */ 16 + #define NBUGINTS 1 /* N 32-bit bug flags */ 17 + 18 + /* 19 + * Note: If the comment begins with a quoted string, that string is used 20 + * in /proc/cpuinfo instead of the macro name. If the string is "", 21 + * this feature bit is not displayed in /proc/cpuinfo at all. 22 + */ 23 + 24 + /* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */ 25 + #define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */ 26 + #define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */ 27 + #define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */ 28 + #define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */ 29 + #define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */ 30 + #define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */ 31 + #define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */ 32 + #define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */ 33 + #define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */ 34 + #define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */ 35 + #define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */ 36 + #define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */ 37 + #define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */ 38 + #define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */ 39 + #define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions */ 40 + /* (plus FCMOVcc, FCOMI with FPU) */ 41 + #define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */ 42 + #define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */ 43 + #define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */ 44 + #define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */ 45 + #define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */ 46 + #define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */ 47 + #define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */ 48 + #define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */ 49 + #define X86_FEATURE_XMM ( 0*32+25) /* "sse" */ 50 + #define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */ 51 + #define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */ 52 + #define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */ 53 + #define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */ 54 + #define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */ 55 + #define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */ 56 + 57 + /* AMD-defined CPU features, CPUID level 0x80000001, word 1 */ 58 + /* Don't duplicate feature flags which are redundant with Intel! */ 59 + #define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */ 60 + #define X86_FEATURE_MP ( 1*32+19) /* MP Capable. */ 61 + #define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */ 62 + #define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */ 63 + #define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */ 64 + #define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */ 65 + #define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */ 66 + #define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64) */ 67 + #define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow! extensions */ 68 + #define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow! */ 69 + 70 + /* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */ 71 + #define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */ 72 + #define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */ 73 + #define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */ 74 + 75 + /* Other features, Linux-defined mapping, word 3 */ 76 + /* This range is used for feature bits which conflict or are synthesized */ 77 + #define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */ 78 + #define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */ 79 + #define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */ 80 + #define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */ 81 + /* cpu types for specific tunings: */ 82 + #define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */ 83 + #define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */ 84 + #define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */ 85 + #define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */ 86 + #define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */ 87 + #define X86_FEATURE_UP ( 3*32+ 9) /* smp kernel running on up */ 88 + #define X86_FEATURE_ART ( 3*32+10) /* Platform has always running timer (ART) */ 89 + #define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */ 90 + #define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */ 91 + #define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */ 92 + #define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in ia32 userspace */ 93 + #define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in ia32 userspace */ 94 + #define X86_FEATURE_REP_GOOD ( 3*32+16) /* rep microcode works well */ 95 + #define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */ 96 + #define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */ 97 + #define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */ 98 + #define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ 99 + #define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */ 100 + #define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */ 101 + #define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */ 102 + #define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */ 103 + /* free, was #define X86_FEATURE_CLFLUSH_MONITOR ( 3*32+25) * "" clflush reqd with monitor */ 104 + #define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */ 105 + #define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */ 106 + #define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */ 107 + #define X86_FEATURE_EAGER_FPU ( 3*32+29) /* "eagerfpu" Non lazy FPU restore */ 108 + #define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ 109 + #define X86_FEATURE_MCE_RECOVERY ( 3*32+31) /* cpu has recoverable machine checks */ 110 + 111 + /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ 112 + #define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */ 113 + #define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */ 114 + #define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */ 115 + #define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" Monitor/Mwait support */ 116 + #define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */ 117 + #define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */ 118 + #define X86_FEATURE_SMX ( 4*32+ 6) /* Safer mode */ 119 + #define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */ 120 + #define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */ 121 + #define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */ 122 + #define X86_FEATURE_CID ( 4*32+10) /* Context ID */ 123 + #define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */ 124 + #define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */ 125 + #define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B */ 126 + #define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */ 127 + #define X86_FEATURE_PDCM ( 4*32+15) /* Performance Capabilities */ 128 + #define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */ 129 + #define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */ 130 + #define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */ 131 + #define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */ 132 + #define X86_FEATURE_X2APIC ( 4*32+21) /* x2APIC */ 133 + #define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */ 134 + #define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */ 135 + #define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* Tsc deadline timer */ 136 + #define X86_FEATURE_AES ( 4*32+25) /* AES instructions */ 137 + #define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ 138 + #define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE enabled in the OS */ 139 + #define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */ 140 + #define X86_FEATURE_F16C ( 4*32+29) /* 16-bit fp conversions */ 141 + #define X86_FEATURE_RDRAND ( 4*32+30) /* The RDRAND instruction */ 142 + #define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */ 143 + 144 + /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ 145 + #define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */ 146 + #define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */ 147 + #define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */ 148 + #define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */ 149 + #define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */ 150 + #define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */ 151 + #define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */ 152 + #define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */ 153 + #define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */ 154 + #define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */ 155 + 156 + /* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */ 157 + #define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */ 158 + #define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */ 159 + #define X86_FEATURE_SVM ( 6*32+ 2) /* Secure virtual machine */ 160 + #define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */ 161 + #define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */ 162 + #define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */ 163 + #define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */ 164 + #define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */ 165 + #define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */ 166 + #define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */ 167 + #define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */ 168 + #define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */ 169 + #define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */ 170 + #define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */ 171 + #define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */ 172 + #define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */ 173 + #define X86_FEATURE_TCE ( 6*32+17) /* translation cache extension */ 174 + #define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */ 175 + #define X86_FEATURE_TBM ( 6*32+21) /* trailing bit manipulations */ 176 + #define X86_FEATURE_TOPOEXT ( 6*32+22) /* topology extensions CPUID leafs */ 177 + #define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */ 178 + #define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */ 179 + #define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */ 180 + #define X86_FEATURE_PTSC ( 6*32+27) /* performance time-stamp counter */ 181 + #define X86_FEATURE_PERFCTR_L2 ( 6*32+28) /* L2 performance counter extensions */ 182 + #define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */ 183 + 184 + /* 185 + * Auxiliary flags: Linux defined - For features scattered in various 186 + * CPUID levels like 0x6, 0xA etc, word 7. 187 + * 188 + * Reuse free bits when adding new feature flags! 189 + */ 190 + 191 + #define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */ 192 + #define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ 193 + 194 + #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ 195 + #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ 196 + 197 + #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ 198 + 199 + /* Virtualization flags: Linux defined, word 8 */ 200 + #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ 201 + #define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */ 202 + #define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */ 203 + #define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */ 204 + #define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */ 205 + 206 + #define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */ 207 + #define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */ 208 + 209 + 210 + /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ 211 + #define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/ 212 + #define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3b */ 213 + #define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */ 214 + #define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */ 215 + #define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */ 216 + #define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */ 217 + #define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */ 218 + #define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */ 219 + #define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */ 220 + #define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */ 221 + #define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */ 222 + #define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */ 223 + #define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */ 224 + #define X86_FEATURE_AVX512DQ ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */ 225 + #define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */ 226 + #define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */ 227 + #define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */ 228 + #define X86_FEATURE_PCOMMIT ( 9*32+22) /* PCOMMIT instruction */ 229 + #define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ 230 + #define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */ 231 + #define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ 232 + #define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ 233 + #define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ 234 + #define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */ 235 + #define X86_FEATURE_AVX512BW ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */ 236 + #define X86_FEATURE_AVX512VL ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */ 237 + 238 + /* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */ 239 + #define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT */ 240 + #define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC */ 241 + #define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 */ 242 + #define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS */ 243 + 244 + /* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */ 245 + #define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */ 246 + 247 + /* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */ 248 + #define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */ 249 + #define X86_FEATURE_CQM_MBM_TOTAL (12*32+ 1) /* LLC Total MBM monitoring */ 250 + #define X86_FEATURE_CQM_MBM_LOCAL (12*32+ 2) /* LLC Local MBM monitoring */ 251 + 252 + /* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */ 253 + #define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */ 254 + #define X86_FEATURE_IRPERF (13*32+1) /* Instructions Retired Count */ 255 + 256 + /* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */ 257 + #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ 258 + #define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */ 259 + #define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */ 260 + #define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */ 261 + #define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */ 262 + #define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */ 263 + #define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */ 264 + #define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */ 265 + #define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */ 266 + #define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */ 267 + 268 + /* AMD SVM Feature Identification, CPUID level 0x8000000a (edx), word 15 */ 269 + #define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */ 270 + #define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */ 271 + #define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */ 272 + #define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */ 273 + #define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */ 274 + #define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */ 275 + #define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */ 276 + #define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */ 277 + #define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */ 278 + #define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */ 279 + #define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ 280 + 281 + /* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */ 282 + #define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ 283 + #define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ 284 + 285 + /* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */ 286 + #define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */ 287 + #define X86_FEATURE_SUCCOR (17*32+1) /* Uncorrectable error containment and recovery */ 288 + #define X86_FEATURE_SMCA (17*32+3) /* Scalable MCA */ 289 + 290 + /* 291 + * BUG word(s) 292 + */ 293 + #define X86_BUG(x) (NCAPINTS*32 + (x)) 294 + 295 + #define X86_BUG_F00F X86_BUG(0) /* Intel F00F */ 296 + #define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */ 297 + #define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */ 298 + #define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */ 299 + #define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */ 300 + #define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */ 301 + #define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */ 302 + #define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */ 303 + #define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */ 304 + #define X86_BUG_NULL_SEG X86_BUG(9) /* Nulling a selector preserves the base */ 305 + #define X86_BUG_SWAPGS_FENCE X86_BUG(10) /* SWAPGS without input dep on GS */ 306 + 307 + 308 + #ifdef CONFIG_X86_32 309 + /* 310 + * 64-bit kernels don't use X86_BUG_ESPFIX. Make the define conditional 311 + * to avoid confusion. 312 + */ 313 + #define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */ 314 + #endif 315 + 316 + #endif /* _ASM_X86_CPUFEATURES_H */
+60
tools/arch/x86/include/asm/disabled-features.h
··· 1 + #ifndef _ASM_X86_DISABLED_FEATURES_H 2 + #define _ASM_X86_DISABLED_FEATURES_H 3 + 4 + /* These features, although they might be available in a CPU 5 + * will not be used because the compile options to support 6 + * them are not present. 7 + * 8 + * This code allows them to be checked and disabled at 9 + * compile time without an explicit #ifdef. Use 10 + * cpu_feature_enabled(). 11 + */ 12 + 13 + #ifdef CONFIG_X86_INTEL_MPX 14 + # define DISABLE_MPX 0 15 + #else 16 + # define DISABLE_MPX (1<<(X86_FEATURE_MPX & 31)) 17 + #endif 18 + 19 + #ifdef CONFIG_X86_64 20 + # define DISABLE_VME (1<<(X86_FEATURE_VME & 31)) 21 + # define DISABLE_K6_MTRR (1<<(X86_FEATURE_K6_MTRR & 31)) 22 + # define DISABLE_CYRIX_ARR (1<<(X86_FEATURE_CYRIX_ARR & 31)) 23 + # define DISABLE_CENTAUR_MCR (1<<(X86_FEATURE_CENTAUR_MCR & 31)) 24 + #else 25 + # define DISABLE_VME 0 26 + # define DISABLE_K6_MTRR 0 27 + # define DISABLE_CYRIX_ARR 0 28 + # define DISABLE_CENTAUR_MCR 0 29 + #endif /* CONFIG_X86_64 */ 30 + 31 + #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS 32 + # define DISABLE_PKU 0 33 + # define DISABLE_OSPKE 0 34 + #else 35 + # define DISABLE_PKU (1<<(X86_FEATURE_PKU & 31)) 36 + # define DISABLE_OSPKE (1<<(X86_FEATURE_OSPKE & 31)) 37 + #endif /* CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS */ 38 + 39 + /* 40 + * Make sure to add features to the correct mask 41 + */ 42 + #define DISABLED_MASK0 (DISABLE_VME) 43 + #define DISABLED_MASK1 0 44 + #define DISABLED_MASK2 0 45 + #define DISABLED_MASK3 (DISABLE_CYRIX_ARR|DISABLE_CENTAUR_MCR|DISABLE_K6_MTRR) 46 + #define DISABLED_MASK4 0 47 + #define DISABLED_MASK5 0 48 + #define DISABLED_MASK6 0 49 + #define DISABLED_MASK7 0 50 + #define DISABLED_MASK8 0 51 + #define DISABLED_MASK9 (DISABLE_MPX) 52 + #define DISABLED_MASK10 0 53 + #define DISABLED_MASK11 0 54 + #define DISABLED_MASK12 0 55 + #define DISABLED_MASK13 0 56 + #define DISABLED_MASK14 0 57 + #define DISABLED_MASK15 0 58 + #define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE) 59 + 60 + #endif /* _ASM_X86_DISABLED_FEATURES_H */
+103
tools/arch/x86/include/asm/required-features.h
··· 1 + #ifndef _ASM_X86_REQUIRED_FEATURES_H 2 + #define _ASM_X86_REQUIRED_FEATURES_H 3 + 4 + /* Define minimum CPUID feature set for kernel These bits are checked 5 + really early to actually display a visible error message before the 6 + kernel dies. Make sure to assign features to the proper mask! 7 + 8 + Some requirements that are not in CPUID yet are also in the 9 + CONFIG_X86_MINIMUM_CPU_FAMILY which is checked too. 10 + 11 + The real information is in arch/x86/Kconfig.cpu, this just converts 12 + the CONFIGs into a bitmask */ 13 + 14 + #ifndef CONFIG_MATH_EMULATION 15 + # define NEED_FPU (1<<(X86_FEATURE_FPU & 31)) 16 + #else 17 + # define NEED_FPU 0 18 + #endif 19 + 20 + #if defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64) 21 + # define NEED_PAE (1<<(X86_FEATURE_PAE & 31)) 22 + #else 23 + # define NEED_PAE 0 24 + #endif 25 + 26 + #ifdef CONFIG_X86_CMPXCHG64 27 + # define NEED_CX8 (1<<(X86_FEATURE_CX8 & 31)) 28 + #else 29 + # define NEED_CX8 0 30 + #endif 31 + 32 + #if defined(CONFIG_X86_CMOV) || defined(CONFIG_X86_64) 33 + # define NEED_CMOV (1<<(X86_FEATURE_CMOV & 31)) 34 + #else 35 + # define NEED_CMOV 0 36 + #endif 37 + 38 + #ifdef CONFIG_X86_USE_3DNOW 39 + # define NEED_3DNOW (1<<(X86_FEATURE_3DNOW & 31)) 40 + #else 41 + # define NEED_3DNOW 0 42 + #endif 43 + 44 + #if defined(CONFIG_X86_P6_NOP) || defined(CONFIG_X86_64) 45 + # define NEED_NOPL (1<<(X86_FEATURE_NOPL & 31)) 46 + #else 47 + # define NEED_NOPL 0 48 + #endif 49 + 50 + #ifdef CONFIG_MATOM 51 + # define NEED_MOVBE (1<<(X86_FEATURE_MOVBE & 31)) 52 + #else 53 + # define NEED_MOVBE 0 54 + #endif 55 + 56 + #ifdef CONFIG_X86_64 57 + #ifdef CONFIG_PARAVIRT 58 + /* Paravirtualized systems may not have PSE or PGE available */ 59 + #define NEED_PSE 0 60 + #define NEED_PGE 0 61 + #else 62 + #define NEED_PSE (1<<(X86_FEATURE_PSE) & 31) 63 + #define NEED_PGE (1<<(X86_FEATURE_PGE) & 31) 64 + #endif 65 + #define NEED_MSR (1<<(X86_FEATURE_MSR & 31)) 66 + #define NEED_FXSR (1<<(X86_FEATURE_FXSR & 31)) 67 + #define NEED_XMM (1<<(X86_FEATURE_XMM & 31)) 68 + #define NEED_XMM2 (1<<(X86_FEATURE_XMM2 & 31)) 69 + #define NEED_LM (1<<(X86_FEATURE_LM & 31)) 70 + #else 71 + #define NEED_PSE 0 72 + #define NEED_MSR 0 73 + #define NEED_PGE 0 74 + #define NEED_FXSR 0 75 + #define NEED_XMM 0 76 + #define NEED_XMM2 0 77 + #define NEED_LM 0 78 + #endif 79 + 80 + #define REQUIRED_MASK0 (NEED_FPU|NEED_PSE|NEED_MSR|NEED_PAE|\ 81 + NEED_CX8|NEED_PGE|NEED_FXSR|NEED_CMOV|\ 82 + NEED_XMM|NEED_XMM2) 83 + #define SSE_MASK (NEED_XMM|NEED_XMM2) 84 + 85 + #define REQUIRED_MASK1 (NEED_LM|NEED_3DNOW) 86 + 87 + #define REQUIRED_MASK2 0 88 + #define REQUIRED_MASK3 (NEED_NOPL) 89 + #define REQUIRED_MASK4 (NEED_MOVBE) 90 + #define REQUIRED_MASK5 0 91 + #define REQUIRED_MASK6 0 92 + #define REQUIRED_MASK7 0 93 + #define REQUIRED_MASK8 0 94 + #define REQUIRED_MASK9 0 95 + #define REQUIRED_MASK10 0 96 + #define REQUIRED_MASK11 0 97 + #define REQUIRED_MASK12 0 98 + #define REQUIRED_MASK13 0 99 + #define REQUIRED_MASK14 0 100 + #define REQUIRED_MASK15 0 101 + #define REQUIRED_MASK16 0 102 + 103 + #endif /* _ASM_X86_REQUIRED_FEATURES_H */
+297
tools/arch/x86/lib/memcpy_64.S
··· 1 + /* Copyright 2002 Andi Kleen */ 2 + 3 + #include <linux/linkage.h> 4 + #include <asm/errno.h> 5 + #include <asm/cpufeatures.h> 6 + #include <asm/alternative-asm.h> 7 + 8 + /* 9 + * We build a jump to memcpy_orig by default which gets NOPped out on 10 + * the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which 11 + * have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs 12 + * to a jmp to memcpy_erms which does the REP; MOVSB mem copy. 13 + */ 14 + 15 + .weak memcpy 16 + 17 + /* 18 + * memcpy - Copy a memory block. 19 + * 20 + * Input: 21 + * rdi destination 22 + * rsi source 23 + * rdx count 24 + * 25 + * Output: 26 + * rax original destination 27 + */ 28 + ENTRY(__memcpy) 29 + ENTRY(memcpy) 30 + ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \ 31 + "jmp memcpy_erms", X86_FEATURE_ERMS 32 + 33 + movq %rdi, %rax 34 + movq %rdx, %rcx 35 + shrq $3, %rcx 36 + andl $7, %edx 37 + rep movsq 38 + movl %edx, %ecx 39 + rep movsb 40 + ret 41 + ENDPROC(memcpy) 42 + ENDPROC(__memcpy) 43 + 44 + /* 45 + * memcpy_erms() - enhanced fast string memcpy. This is faster and 46 + * simpler than memcpy. Use memcpy_erms when possible. 47 + */ 48 + ENTRY(memcpy_erms) 49 + movq %rdi, %rax 50 + movq %rdx, %rcx 51 + rep movsb 52 + ret 53 + ENDPROC(memcpy_erms) 54 + 55 + ENTRY(memcpy_orig) 56 + movq %rdi, %rax 57 + 58 + cmpq $0x20, %rdx 59 + jb .Lhandle_tail 60 + 61 + /* 62 + * We check whether memory false dependence could occur, 63 + * then jump to corresponding copy mode. 64 + */ 65 + cmp %dil, %sil 66 + jl .Lcopy_backward 67 + subq $0x20, %rdx 68 + .Lcopy_forward_loop: 69 + subq $0x20, %rdx 70 + 71 + /* 72 + * Move in blocks of 4x8 bytes: 73 + */ 74 + movq 0*8(%rsi), %r8 75 + movq 1*8(%rsi), %r9 76 + movq 2*8(%rsi), %r10 77 + movq 3*8(%rsi), %r11 78 + leaq 4*8(%rsi), %rsi 79 + 80 + movq %r8, 0*8(%rdi) 81 + movq %r9, 1*8(%rdi) 82 + movq %r10, 2*8(%rdi) 83 + movq %r11, 3*8(%rdi) 84 + leaq 4*8(%rdi), %rdi 85 + jae .Lcopy_forward_loop 86 + addl $0x20, %edx 87 + jmp .Lhandle_tail 88 + 89 + .Lcopy_backward: 90 + /* 91 + * Calculate copy position to tail. 92 + */ 93 + addq %rdx, %rsi 94 + addq %rdx, %rdi 95 + subq $0x20, %rdx 96 + /* 97 + * At most 3 ALU operations in one cycle, 98 + * so append NOPS in the same 16 bytes trunk. 99 + */ 100 + .p2align 4 101 + .Lcopy_backward_loop: 102 + subq $0x20, %rdx 103 + movq -1*8(%rsi), %r8 104 + movq -2*8(%rsi), %r9 105 + movq -3*8(%rsi), %r10 106 + movq -4*8(%rsi), %r11 107 + leaq -4*8(%rsi), %rsi 108 + movq %r8, -1*8(%rdi) 109 + movq %r9, -2*8(%rdi) 110 + movq %r10, -3*8(%rdi) 111 + movq %r11, -4*8(%rdi) 112 + leaq -4*8(%rdi), %rdi 113 + jae .Lcopy_backward_loop 114 + 115 + /* 116 + * Calculate copy position to head. 117 + */ 118 + addl $0x20, %edx 119 + subq %rdx, %rsi 120 + subq %rdx, %rdi 121 + .Lhandle_tail: 122 + cmpl $16, %edx 123 + jb .Lless_16bytes 124 + 125 + /* 126 + * Move data from 16 bytes to 31 bytes. 127 + */ 128 + movq 0*8(%rsi), %r8 129 + movq 1*8(%rsi), %r9 130 + movq -2*8(%rsi, %rdx), %r10 131 + movq -1*8(%rsi, %rdx), %r11 132 + movq %r8, 0*8(%rdi) 133 + movq %r9, 1*8(%rdi) 134 + movq %r10, -2*8(%rdi, %rdx) 135 + movq %r11, -1*8(%rdi, %rdx) 136 + retq 137 + .p2align 4 138 + .Lless_16bytes: 139 + cmpl $8, %edx 140 + jb .Lless_8bytes 141 + /* 142 + * Move data from 8 bytes to 15 bytes. 143 + */ 144 + movq 0*8(%rsi), %r8 145 + movq -1*8(%rsi, %rdx), %r9 146 + movq %r8, 0*8(%rdi) 147 + movq %r9, -1*8(%rdi, %rdx) 148 + retq 149 + .p2align 4 150 + .Lless_8bytes: 151 + cmpl $4, %edx 152 + jb .Lless_3bytes 153 + 154 + /* 155 + * Move data from 4 bytes to 7 bytes. 156 + */ 157 + movl (%rsi), %ecx 158 + movl -4(%rsi, %rdx), %r8d 159 + movl %ecx, (%rdi) 160 + movl %r8d, -4(%rdi, %rdx) 161 + retq 162 + .p2align 4 163 + .Lless_3bytes: 164 + subl $1, %edx 165 + jb .Lend 166 + /* 167 + * Move data from 1 bytes to 3 bytes. 168 + */ 169 + movzbl (%rsi), %ecx 170 + jz .Lstore_1byte 171 + movzbq 1(%rsi), %r8 172 + movzbq (%rsi, %rdx), %r9 173 + movb %r8b, 1(%rdi) 174 + movb %r9b, (%rdi, %rdx) 175 + .Lstore_1byte: 176 + movb %cl, (%rdi) 177 + 178 + .Lend: 179 + retq 180 + ENDPROC(memcpy_orig) 181 + 182 + #ifndef CONFIG_UML 183 + /* 184 + * memcpy_mcsafe - memory copy with machine check exception handling 185 + * Note that we only catch machine checks when reading the source addresses. 186 + * Writes to target are posted and don't generate machine checks. 187 + */ 188 + ENTRY(memcpy_mcsafe) 189 + cmpl $8, %edx 190 + /* Less than 8 bytes? Go to byte copy loop */ 191 + jb .L_no_whole_words 192 + 193 + /* Check for bad alignment of source */ 194 + testl $7, %esi 195 + /* Already aligned */ 196 + jz .L_8byte_aligned 197 + 198 + /* Copy one byte at a time until source is 8-byte aligned */ 199 + movl %esi, %ecx 200 + andl $7, %ecx 201 + subl $8, %ecx 202 + negl %ecx 203 + subl %ecx, %edx 204 + .L_copy_leading_bytes: 205 + movb (%rsi), %al 206 + movb %al, (%rdi) 207 + incq %rsi 208 + incq %rdi 209 + decl %ecx 210 + jnz .L_copy_leading_bytes 211 + 212 + .L_8byte_aligned: 213 + /* Figure out how many whole cache lines (64-bytes) to copy */ 214 + movl %edx, %ecx 215 + andl $63, %edx 216 + shrl $6, %ecx 217 + jz .L_no_whole_cache_lines 218 + 219 + /* Loop copying whole cache lines */ 220 + .L_cache_w0: movq (%rsi), %r8 221 + .L_cache_w1: movq 1*8(%rsi), %r9 222 + .L_cache_w2: movq 2*8(%rsi), %r10 223 + .L_cache_w3: movq 3*8(%rsi), %r11 224 + movq %r8, (%rdi) 225 + movq %r9, 1*8(%rdi) 226 + movq %r10, 2*8(%rdi) 227 + movq %r11, 3*8(%rdi) 228 + .L_cache_w4: movq 4*8(%rsi), %r8 229 + .L_cache_w5: movq 5*8(%rsi), %r9 230 + .L_cache_w6: movq 6*8(%rsi), %r10 231 + .L_cache_w7: movq 7*8(%rsi), %r11 232 + movq %r8, 4*8(%rdi) 233 + movq %r9, 5*8(%rdi) 234 + movq %r10, 6*8(%rdi) 235 + movq %r11, 7*8(%rdi) 236 + leaq 64(%rsi), %rsi 237 + leaq 64(%rdi), %rdi 238 + decl %ecx 239 + jnz .L_cache_w0 240 + 241 + /* Are there any trailing 8-byte words? */ 242 + .L_no_whole_cache_lines: 243 + movl %edx, %ecx 244 + andl $7, %edx 245 + shrl $3, %ecx 246 + jz .L_no_whole_words 247 + 248 + /* Copy trailing words */ 249 + .L_copy_trailing_words: 250 + movq (%rsi), %r8 251 + mov %r8, (%rdi) 252 + leaq 8(%rsi), %rsi 253 + leaq 8(%rdi), %rdi 254 + decl %ecx 255 + jnz .L_copy_trailing_words 256 + 257 + /* Any trailing bytes? */ 258 + .L_no_whole_words: 259 + andl %edx, %edx 260 + jz .L_done_memcpy_trap 261 + 262 + /* Copy trailing bytes */ 263 + movl %edx, %ecx 264 + .L_copy_trailing_bytes: 265 + movb (%rsi), %al 266 + movb %al, (%rdi) 267 + incq %rsi 268 + incq %rdi 269 + decl %ecx 270 + jnz .L_copy_trailing_bytes 271 + 272 + /* Copy successful. Return zero */ 273 + .L_done_memcpy_trap: 274 + xorq %rax, %rax 275 + ret 276 + ENDPROC(memcpy_mcsafe) 277 + 278 + .section .fixup, "ax" 279 + /* Return -EFAULT for any failure */ 280 + .L_memcpy_mcsafe_fail: 281 + mov $-EFAULT, %rax 282 + ret 283 + 284 + .previous 285 + 286 + _ASM_EXTABLE_FAULT(.L_copy_leading_bytes, .L_memcpy_mcsafe_fail) 287 + _ASM_EXTABLE_FAULT(.L_cache_w0, .L_memcpy_mcsafe_fail) 288 + _ASM_EXTABLE_FAULT(.L_cache_w1, .L_memcpy_mcsafe_fail) 289 + _ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail) 290 + _ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail) 291 + _ASM_EXTABLE_FAULT(.L_cache_w4, .L_memcpy_mcsafe_fail) 292 + _ASM_EXTABLE_FAULT(.L_cache_w5, .L_memcpy_mcsafe_fail) 293 + _ASM_EXTABLE_FAULT(.L_cache_w6, .L_memcpy_mcsafe_fail) 294 + _ASM_EXTABLE_FAULT(.L_cache_w7, .L_memcpy_mcsafe_fail) 295 + _ASM_EXTABLE_FAULT(.L_copy_trailing_words, .L_memcpy_mcsafe_fail) 296 + _ASM_EXTABLE_FAULT(.L_copy_trailing_bytes, .L_memcpy_mcsafe_fail) 297 + #endif
+138
tools/arch/x86/lib/memset_64.S
··· 1 + /* Copyright 2002 Andi Kleen, SuSE Labs */ 2 + 3 + #include <linux/linkage.h> 4 + #include <asm/cpufeatures.h> 5 + #include <asm/alternative-asm.h> 6 + 7 + .weak memset 8 + 9 + /* 10 + * ISO C memset - set a memory block to a byte value. This function uses fast 11 + * string to get better performance than the original function. The code is 12 + * simpler and shorter than the original function as well. 13 + * 14 + * rdi destination 15 + * rsi value (char) 16 + * rdx count (bytes) 17 + * 18 + * rax original destination 19 + */ 20 + ENTRY(memset) 21 + ENTRY(__memset) 22 + /* 23 + * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended 24 + * to use it when possible. If not available, use fast string instructions. 25 + * 26 + * Otherwise, use original memset function. 27 + */ 28 + ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \ 29 + "jmp memset_erms", X86_FEATURE_ERMS 30 + 31 + movq %rdi,%r9 32 + movq %rdx,%rcx 33 + andl $7,%edx 34 + shrq $3,%rcx 35 + /* expand byte value */ 36 + movzbl %sil,%esi 37 + movabs $0x0101010101010101,%rax 38 + imulq %rsi,%rax 39 + rep stosq 40 + movl %edx,%ecx 41 + rep stosb 42 + movq %r9,%rax 43 + ret 44 + ENDPROC(memset) 45 + ENDPROC(__memset) 46 + 47 + /* 48 + * ISO C memset - set a memory block to a byte value. This function uses 49 + * enhanced rep stosb to override the fast string function. 50 + * The code is simpler and shorter than the fast string function as well. 51 + * 52 + * rdi destination 53 + * rsi value (char) 54 + * rdx count (bytes) 55 + * 56 + * rax original destination 57 + */ 58 + ENTRY(memset_erms) 59 + movq %rdi,%r9 60 + movb %sil,%al 61 + movq %rdx,%rcx 62 + rep stosb 63 + movq %r9,%rax 64 + ret 65 + ENDPROC(memset_erms) 66 + 67 + ENTRY(memset_orig) 68 + movq %rdi,%r10 69 + 70 + /* expand byte value */ 71 + movzbl %sil,%ecx 72 + movabs $0x0101010101010101,%rax 73 + imulq %rcx,%rax 74 + 75 + /* align dst */ 76 + movl %edi,%r9d 77 + andl $7,%r9d 78 + jnz .Lbad_alignment 79 + .Lafter_bad_alignment: 80 + 81 + movq %rdx,%rcx 82 + shrq $6,%rcx 83 + jz .Lhandle_tail 84 + 85 + .p2align 4 86 + .Lloop_64: 87 + decq %rcx 88 + movq %rax,(%rdi) 89 + movq %rax,8(%rdi) 90 + movq %rax,16(%rdi) 91 + movq %rax,24(%rdi) 92 + movq %rax,32(%rdi) 93 + movq %rax,40(%rdi) 94 + movq %rax,48(%rdi) 95 + movq %rax,56(%rdi) 96 + leaq 64(%rdi),%rdi 97 + jnz .Lloop_64 98 + 99 + /* Handle tail in loops. The loops should be faster than hard 100 + to predict jump tables. */ 101 + .p2align 4 102 + .Lhandle_tail: 103 + movl %edx,%ecx 104 + andl $63&(~7),%ecx 105 + jz .Lhandle_7 106 + shrl $3,%ecx 107 + .p2align 4 108 + .Lloop_8: 109 + decl %ecx 110 + movq %rax,(%rdi) 111 + leaq 8(%rdi),%rdi 112 + jnz .Lloop_8 113 + 114 + .Lhandle_7: 115 + andl $7,%edx 116 + jz .Lende 117 + .p2align 4 118 + .Lloop_1: 119 + decl %edx 120 + movb %al,(%rdi) 121 + leaq 1(%rdi),%rdi 122 + jnz .Lloop_1 123 + 124 + .Lende: 125 + movq %r10,%rax 126 + ret 127 + 128 + .Lbad_alignment: 129 + cmpq $7,%rdx 130 + jbe .Lhandle_7 131 + movq %rax,(%rdi) /* unaligned store */ 132 + movq $8,%r8 133 + subq %r9,%r8 134 + addq %r8,%rdi 135 + subq %r8,%rdx 136 + jmp .Lafter_bad_alignment 137 + .Lfinal: 138 + ENDPROC(memset_orig)
+6 -3
tools/perf/MANIFEST
··· 12 12 tools/arch/sparc/include/asm/barrier_64.h 13 13 tools/arch/tile/include/asm/barrier.h 14 14 tools/arch/x86/include/asm/barrier.h 15 + tools/arch/x86/include/asm/cpufeatures.h 16 + tools/arch/x86/include/asm/disabled-features.h 17 + tools/arch/x86/include/asm/required-features.h 18 + tools/arch/x86/lib/memcpy_64.S 19 + tools/arch/x86/lib/memset_64.S 15 20 tools/arch/xtensa/include/asm/barrier.h 16 21 tools/scripts 17 22 tools/build ··· 36 31 tools/lib/bitmap.c 37 32 tools/lib/str_error_r.c 38 33 tools/lib/vsprintf.c 34 + tools/include/asm/alternative-asm.h 39 35 tools/include/asm/atomic.h 40 36 tools/include/asm/barrier.h 41 37 tools/include/asm/bug.h ··· 80 74 arch/*/include/asm/unistd*.h 81 75 arch/*/include/uapi/asm/unistd*.h 82 76 arch/*/include/uapi/asm/perf_regs.h 83 - arch/*/lib/memcpy*.S 84 - arch/*/lib/memset*.S 85 - arch/*/include/asm/*features.h 86 77 include/linux/poison.h 87 78 include/linux/hw_breakpoint.h 88 79 include/uapi/linux/bpf.h
+15
tools/perf/Makefile.perf
··· 348 348 @(test -f ../../include/uapi/linux/perf_event.h && ( \ 349 349 (diff -B ../include/uapi/linux/perf_event.h ../../include/uapi/linux/perf_event.h >/dev/null) \ 350 350 || echo "Warning: tools/include/uapi/linux/perf_event.h differs from kernel" >&2 )) || true 351 + @(test -f ../../arch/x86/include/asm/disabled-features.h && ( \ 352 + (diff -B ../arch/x86/include/asm/disabled-features.h ../../arch/x86/include/asm/disabled-features.h >/dev/null) \ 353 + || echo "Warning: tools/arch/x86/include/asm/disabled-features.h differs from kernel" >&2 )) || true 354 + @(test -f ../../arch/x86/include/asm/required-features.h && ( \ 355 + (diff -B ../arch/x86/include/asm/required-features.h ../../arch/x86/include/asm/required-features.h >/dev/null) \ 356 + || echo "Warning: tools/arch/x86/include/asm/required-features.h differs from kernel" >&2 )) || true 357 + @(test -f ../../arch/x86/include/asm/cpufeatures.h && ( \ 358 + (diff -B ../arch/x86/include/asm/cpufeatures.h ../../arch/x86/include/asm/cpufeatures.h >/dev/null) \ 359 + || echo "Warning: tools/arch/x86/include/asm/cpufeatures.h differs from kernel" >&2 )) || true 360 + @(test -f ../../arch/x86/lib/memcpy_64.S && ( \ 361 + (diff -B ../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memcpy_64.S >/dev/null) \ 362 + || echo "Warning: tools/arch/x86/lib/memcpy_64.S differs from kernel" >&2 )) || true 363 + @(test -f ../../arch/x86/lib/memset_64.S && ( \ 364 + (diff -B ../arch/x86/lib/memset_64.S ../../arch/x86/lib/memset_64.S >/dev/null) \ 365 + || echo "Warning: tools/arch/x86/lib/memset_64.S differs from kernel" >&2 )) || true 351 366 $(Q)$(MAKE) $(build)=perf 352 367 353 368 $(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(LIBTRACEEVENT_DYNAMIC_LIST)
+1 -1
tools/perf/bench/mem-memcpy-x86-64-asm.S
··· 6 6 #define globl p2align 4; .globl 7 7 #define _ASM_EXTABLE_FAULT(x, y) 8 8 9 - #include "../../../arch/x86/lib/memcpy_64.S" 9 + #include "../../arch/x86/lib/memcpy_64.S" 10 10 /* 11 11 * We need to provide note.GNU-stack section, saying that we want 12 12 * NOT executable stack. Otherwise the final linking will assume that
+1 -1
tools/perf/bench/mem-memset-x86-64-asm.S
··· 1 1 #define memset MEMSET /* don't hide glibc's memset() */ 2 2 #define altinstr_replacement text 3 3 #define globl p2align 4; .globl 4 - #include "../../../arch/x86/lib/memset_64.S" 4 + #include "../../arch/x86/lib/memset_64.S" 5 5 6 6 /* 7 7 * We need to provide note.GNU-stack section, saying that we want
+2 -2
tools/perf/util/include/asm/alternative-asm.h tools/include/asm/alternative-asm.h
··· 1 - #ifndef _PERF_ASM_ALTERNATIVE_ASM_H 2 - #define _PERF_ASM_ALTERNATIVE_ASM_H 1 + #ifndef _TOOLS_ASM_ALTERNATIVE_ASM_H 2 + #define _TOOLS_ASM_ALTERNATIVE_ASM_H 3 3 4 4 /* Just disable it so we can build arch/x86/lib/memcpy_64.S for perf bench: */ 5 5